James Cole - December 2019. Built with R version 3.5.2
This is Notebook contains the final brain age analysis of MS patient data and controls from the UCL cohort, the MAGNIMS consortium and the Imperial College London PET study (n=25). The analysis uses brain-predicted age difference (brain-PAD) to look at brain ageing in the context of MS. The brain-PAD values were generated in PRONTO, using an independent healthy (n=2001) training dataset, and the values were corrected for proportional bias using the intercept and slope of the age by brain-predicted age regression in the training dataset.

Set-up

Clear workspace, set colour palette

rm(list = ls()) ## clear workspace
ms.palette <- c("darkgreen", "darkorange", "red", "blue", "purple") # define MS colour scheme for groups
sessionInfo()
## R version 3.5.2 (2018-12-20)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS  10.15.2
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] tidyr_1.0.0        survminer_0.4.3    ggpubr_0.2        
##  [4] magrittr_1.5       survival_2.43-3    stringr_1.3.1     
##  [7] sjPlot_2.8.2       sjmisc_2.8.2       scales_1.0.0      
## [10] RColorBrewer_1.1-2 qwraps2_0.4.1      psych_1.8.12      
## [13] pryr_0.1.4         ppcor_1.1          plyr_1.8.4        
## [16] plotrix_3.7-4      metafor_2.0-0      MASS_7.3-51.1     
## [19] lmerTest_3.0-1     lme4_1.1-21        Matrix_1.2-15     
## [22] lm.beta_1.5-1      knitr_1.21         jtools_1.1.1      
## [25] hier.part_1.0-4    gtools_3.8.1       gridExtra_2.3     
## [28] glmmTMB_0.2.3      ggplot2_3.2.1      ggstance_0.3.1    
## [31] emmeans_1.4.2      effects_4.1-4      dplyr_0.8.3       
## [34] data.table_1.11.8  cowplot_1.0.0      corrplot_0.84     
## [37] car_3.0-2          carData_3.0-2     
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-137      cmprsk_2.2-7      insight_0.8.0    
##  [4] numDeriv_2016.8-1 backports_1.1.3   tools_3.5.2      
##  [7] TMB_1.7.15        R6_2.3.0          sjlabelled_1.1.2 
## [10] DBI_1.0.0         lazyeval_0.2.1    colorspace_1.3-2 
## [13] nnet_7.3-12       withr_2.1.2       tidyselect_0.2.5 
## [16] mnormt_1.5-5      curl_3.2          compiler_3.5.2   
## [19] performance_0.4.3 cli_1.0.1         bayestestR_0.5.1 
## [22] survMisc_0.5.5    mvtnorm_1.0-8     digest_0.6.18    
## [25] foreign_0.8-71    minqa_1.2.4       rmarkdown_1.11   
## [28] rio_0.5.16        pkgconfig_2.0.2   htmltools_0.3.6  
## [31] rlang_0.4.0       readxl_1.2.0      generics_0.0.2   
## [34] zoo_1.8-4         zip_1.0.0         parameters_0.4.1 
## [37] Rcpp_1.0.2        munsell_0.5.0     abind_1.4-5      
## [40] lifecycle_0.1.0   stringi_1.2.4     yaml_2.2.0       
## [43] grid_3.5.2        parallel_3.5.2    forcats_0.3.0    
## [46] crayon_1.3.4      lattice_0.20-38   ggeffects_0.14.0 
## [49] haven_2.0.0       splines_3.5.2     sjstats_0.17.8   
## [52] hms_0.4.2         zeallot_0.1.0     pillar_1.3.1     
## [55] boot_1.3-21       estimability_1.3  effectsize_0.1.1 
## [58] codetools_0.2-16  glue_1.3.1        evaluate_0.12    
## [61] mitools_2.4       modelr_0.1.4      vctrs_0.2.0      
## [64] nloptr_1.2.1      cellranger_1.1.0  gtable_0.2.0     
## [67] purrr_0.3.2       km.ci_0.5-2       assertthat_0.2.0 
## [70] xfun_0.4          openxlsx_4.1.0    xtable_1.8-3     
## [73] broom_0.5.2       survey_3.36       coda_0.19-3      
## [76] tibble_2.1.3      KMsurv_0.1-5

Get data from CSV and define longitudinal data.frames

df <- read.csv("MS_brain_age_final_long.csv")
df$subtype <- factor(df$subtype, levels = c("control", "CIS", "RRMS", "SPMS", "PPMS")) # reorder subtype factor to put controls first
df$Cohort <- recode(df$Cohort, JR1 = "Imperial", C0 = "UCL0", C1 = "UCL1", C2 = "UCL2", C3 = "UCL3", C4 = "UCL4" ,C5 = "UCL5", C6 = "UCL6", C7 = "UCL7", A = "Amsterdam", B = "Barcelona", G = "Graz", M = "Milan", R = "Rome", S = "Siena")
df$wb_vol_ratio_icv <- df$WBV / df$ICV

Get data on Brain Age Healthy Training dataset

banc <- read.csv("/Users/jcole/Work/Brain ageing/BANC_2015/BANC_N2003_age_sex_Brain_age_predictions.csv")

Exclude participants with errors in the database & correct time since diagnosis errors

tmp <- df[df$Cohort == 'Amsterdam',] %>% group_by(PatientID) %>% dplyr::summarize(sd = sd(age_at_scan)) %>% arrange(desc(sd)) %>% filter(sd > 2)
excluded_IDs <- sort(tmp$PatientID)
# excluded_IDs <- unique(df[which(df$age_at_scan < df$age_at_baseline_scan1),1])
excluded_scans <- (df %>% filter(str_detect(PatientID, paste(excluded_IDs, collapse = "|"))))["ScanName"]
df <- df %>%
  filter(!str_detect(PatientID, paste(excluded_IDs, collapse = "|")))
rm(tmp)

There were 13 subjects with 38 scans excluded in total. Data entry errors in original spreadsheet; age at baseline not consistent within subject.

Load data for lesion filling analysis

## read in data
lesion_df <- read.csv(file = '~/Work/Brain ageing/Collaborations/MS/MAGNIMS/MAGNIMS20171224_final.csv')
lesion_df$subtype <- factor(lesion_df$subtype, levels = c("control", "CIS", "RRMS", "SPMS", "PPMS"))
## create ICV ratio variables
lesion_df <- lesion_df %>%
  mutate(ICV = GMV + WMV + CSFV) %>%
  mutate(gm_icv_ratio = GMV/ICV) %>%
  mutate(wm_icv_ratio = WMV/ICV)

Recode scanner status variable to field strength

Pre-post 2004 data only available for one site.

table(df$scanner_status)
## 
##           1.5T 1.5T_post_2004  1.5T_pre_2004             3T 
##           2257             19            447            842
df$field_strength <- recode(df$scanner_status, `1.5T_pre_2004` = "1.5T", `1.5T_post_2004` = "1.5T")
table(df$field_strength)
## 
## 1.5T   3T 
## 2723  842

Generate baseline only data.frame and show data frame structure

df.bl <- df[(df$age_at_baseline_scan1 == df$age_at_scan),] # define baseline data.frame
str(df)
## 'data.frame':    3565 obs. of  34 variables:
##  $ PatientID                         : Factor w/ 1367 levels "AMSTERDAM_4001",..: 1 1 1 2 2 2 3 3 3 4 ...
##  $ Cohort                            : Factor w/ 15 levels "Amsterdam","Barcelona",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ scanner_status                    : Factor w/ 4 levels "1.5T","1.5T_post_2004",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ gender                            : Factor w/ 2 levels "female","male": 2 2 2 1 1 1 1 1 1 1 ...
##  $ control0ppms1cisoforever2other3   : int  3 3 3 3 3 3 3 3 3 3 ...
##  $ control0rest1                     : Factor w/ 2 levels "control","MS": 2 2 2 2 2 2 2 2 2 2 ...
##  $ control0ms1cis2                   : Factor w/ 3 levels "CIS","control",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ age_at_baseline_scan1             : num  57.8 57.8 57.8 44.4 44.4 ...
##  $ disease_duration_at_baseline_scan1: num  6.8 6.8 6.8 6.38 6.38 ...
##  $ DMT_YesNoNA                       : Factor w/ 2 levels "NO","YES": 1 1 1 1 1 1 2 2 2 2 ...
##  $ DMTYes1                           : Factor w/ 2 levels "No treatment",..: 1 1 1 1 1 1 2 2 2 2 ...
##  $ EDSSbaseline                      : num  2.5 2.5 2.5 3 3 3 4 4 4 2.5 ...
##  $ NoScans                           : int  3 3 3 3 3 3 3 3 3 2 ...
##  $ FUTimeMax                         : num  2.32 2.32 2.32 2.35 2.35 ...
##  $ ScanName                          : Factor w/ 3603 levels "AMSTERDAM_4001_baseline_t1",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ age_at_scan                       : num  57.8 59.1 60.1 44.4 45.5 ...
##  $ EDSSatScan                        : num  2.5 3.5 3 3 2 2.5 4 2.5 4 2.5 ...
##  $ BrainAge                          : num  69.6 73.4 71 58.1 61.8 ...
##  $ BrainPAD                          : num  11.8 14.3 10.8 13.7 16.3 ...
##  $ subtype                           : Factor w/ 5 levels "control","CIS",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ disease_onset_age                 : num  51 51 51 38 38 ...
##  $ disease_duration                  : num  6.8 6.8 6.8 6.38 6.38 ...
##  $ interval                          : num  0 1.3 2.32 0 1.16 ...
##  $ scan_number                       : Factor w/ 10 levels "scan1","scan10",..: 1 3 4 1 3 4 1 3 4 1 ...
##  $ GM_vol                            : num  0.574 0.563 0.573 0.622 0.605 0.59 0.647 0.643 0.633 0.648 ...
##  $ WM_vol                            : num  0.536 0.532 0.52 0.43 0.423 0.413 0.404 0.399 0.387 0.599 ...
##  $ CSF_vol                           : num  0.352 0.349 0.367 0.293 0.309 0.333 0.355 0.358 0.386 0.271 ...
##  $ WBV                               : num  1.11 1.09 1.09 1.05 1.03 ...
##  $ ICV                               : num  1.46 1.44 1.46 1.34 1.34 ...
##  $ gm_vol_ratio_icv                  : num  0.393 0.39 0.392 0.462 0.453 ...
##  $ wm_vol_ratio_icv                  : num  0.367 0.368 0.356 0.32 0.316 ...
##  $ csf_vol_ratio_icv                 : num  0.241 0.242 0.251 0.218 0.231 ...
##  $ wb_vol_ratio_icv                  : num  0.759 0.758 0.749 0.782 0.769 ...
##  $ field_strength                    : Factor w/ 2 levels "1.5T","3T": 1 1 1 1 1 1 1 1 1 1 ...

Basic stats

Total number of subjects, and by group

The total number of subjects included was n = 1354

The total number of MS patients (including CIS) was n = 1204 and healthy controls was n = 150

Number of scans in total and per group

Total number of scans = 3565

Number of people with 2 or more scans

df.bl %>%
  filter(NoScans >= 2) %>%
  group_by(control0rest1) %>%
  tally()
## # A tibble: 2 x 2
##   control0rest1     n
##   <fct>         <int>
## 1 control         111
## 2 MS             1155

Number of people with 3 or more scans

df.bl %>%
  filter(NoScans >= 3) %>%
  group_by(control0rest1) %>%
  tally()
## # A tibble: 2 x 2
##   control0rest1     n
##   <fct>         <int>
## 1 control          64
## 2 MS              509

Generate demographics table using qwarps2

options(qwraps2_markup = "markdown", digits = 2)

table1 <-
  list("N" =
         list("Control" = ~ qwraps2::n_perc0(control0rest1 == "control", na_rm = T),
              "MS"  = ~ qwraps2::n_perc0(control0rest1 == "MS", na_rm = T)),
       "N with follow-up" = 
         list("Yes" = ~ sum(NoScans>1)),
     "Gender" =
       list("Female" = ~ qwraps2::n_perc0(gender == "female", show_symbol = T),
            "Male"  = ~ qwraps2::n_perc0(gender == "male", show_symbol = T)),
        "Number of scans" =
       list("min" = ~ min(NoScans),
            "max" = ~ max(NoScans),
            "mean (sd)" = ~ qwraps2::mean_sd(NoScans)),
       "Age at baseline scan (years)" =
       list("min" = ~ min(age_at_baseline_scan1),
            "max" = ~ max(age_at_baseline_scan1),
            "mean (sd)" = ~ qwraps2::mean_sd(age_at_baseline_scan1)),
       "Brain-predicted age at baseline scan (years)" =
       list("min" = ~ min(BrainAge),
            "max" = ~ max(BrainAge),
            "mean (sd)" = ~ qwraps2::mean_sd(BrainAge)),
       "Disease duration at baseline (years)" =
       list("min" = ~ min(disease_duration, na.rm = T),
            "max" = ~ max(disease_duration, na.rm = T),
            "mean (sd)" = ~ qwraps2::mean_sd(disease_duration, na_rm = T, show_n = "never")),
        "EDSS at baseline " =
       list("min" = ~ min(EDSSbaseline, na.rm = T),
            "max" = ~ max(EDSSbaseline, na.rm = T),
            "mean (sd)" = ~ qwraps2::mean_sd(EDSSbaseline, na_rm = T, show_n = "never")),
       "MS subtype" =
       list("CIS" = ~ qwraps2::n_perc0(subtype == "CIS", show_symbol = T),
            "RRMS"  = ~ qwraps2::n_perc0(subtype == "RRMS", show_symbol = T),
            "SPMS"  = ~ qwraps2::n_perc0(subtype == "SPMS", show_symbol = T),
            "PPMS"  = ~ qwraps2::n_perc0(subtype == "PPMS", show_symbol = T)),
       "Treatment" =
         list("Yes" = ~ qwraps2::n_perc0(DMT_YesNoNA == "YES", na_rm = T, show_symbol = T),
              "No"  = ~ qwraps2::n_perc0(DMT_YesNoNA == "NO", na_rm = T, show_symbol = T),
              "Missing" = ~ sum(is.na(DMT_YesNoNA)))
       )

print(summary_table(dplyr::group_by(df.bl, control0rest1), table1),
      rtitle = "Sample Chararcteristics",
      cnames = c("Controls", "MS patients"))
## 
## 
## |Sample Chararcteristics                          |Controls             |MS patients          |
## |:------------------------------------------------|:--------------------|:--------------------|
## |**N**                                            |&nbsp;&nbsp;         |&nbsp;&nbsp;         |
## |&nbsp;&nbsp; Control                             |150 (100)            |0 (0)                |
## |&nbsp;&nbsp; MS                                  |0 (0)                |1,204 (100)          |
## |**N with follow-up**                             |&nbsp;&nbsp;         |&nbsp;&nbsp;         |
## |&nbsp;&nbsp; Yes                                 |111                  |1155                 |
## |**Gender**                                       |&nbsp;&nbsp;         |&nbsp;&nbsp;         |
## |&nbsp;&nbsp; Female                              |82 (55%)             |771 (64%)            |
## |&nbsp;&nbsp; Male                                |68 (45%)             |433 (36%)            |
## |**Number of scans**                              |&nbsp;&nbsp;         |&nbsp;&nbsp;         |
## |&nbsp;&nbsp; min                                 |1                    |1                    |
## |&nbsp;&nbsp; max                                 |10                   |7                    |
## |&nbsp;&nbsp; mean (sd)                           |2.82 &plusmn; 1.90   |2.61 &plusmn; 1.01   |
## |**Age at baseline scan (years)**                 |&nbsp;&nbsp;         |&nbsp;&nbsp;         |
## |&nbsp;&nbsp; min                                 |23                   |15                   |
## |&nbsp;&nbsp; max                                 |66                   |68                   |
## |&nbsp;&nbsp; mean (sd)                           |37.29 &plusmn; 9.96  |39.41 &plusmn; 10.76 |
## |**Brain-predicted age at baseline scan (years)** |&nbsp;&nbsp;         |&nbsp;&nbsp;         |
## |&nbsp;&nbsp; min                                 |14.5                 |7.4                  |
## |&nbsp;&nbsp; max                                 |70                   |92                   |
## |&nbsp;&nbsp; mean (sd)                           |38.43 &plusmn; 11.12 |50.27 &plusmn; 14.90 |
## |**Disease duration at baseline (years)**         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |
## |&nbsp;&nbsp; min                                 |Inf                  |0                    |
## |&nbsp;&nbsp; max                                 |-Inf                 |48                   |
## |&nbsp;&nbsp; mean (sd)                           |NaN &plusmn;  NA     |7.26 &plusmn; 7.96   |
## |**EDSS at baseline **                            |&nbsp;&nbsp;         |&nbsp;&nbsp;         |
## |&nbsp;&nbsp; min                                 |Inf                  |0                    |
## |&nbsp;&nbsp; max                                 |-Inf                 |9                    |
## |&nbsp;&nbsp; mean (sd)                           |NaN &plusmn;  NA     |2.60 &plusmn; 1.95   |
## |**MS subtype**                                   |&nbsp;&nbsp;         |&nbsp;&nbsp;         |
## |&nbsp;&nbsp; CIS                                 |0 (0%)               |296 (25%)            |
## |&nbsp;&nbsp; RRMS                                |0 (0%)               |677 (56%)            |
## |&nbsp;&nbsp; SPMS                                |0 (0%)               |111 (9%)             |
## |&nbsp;&nbsp; PPMS                                |0 (0%)               |120 (10%)            |
## |**Treatment**                                    |&nbsp;&nbsp;         |&nbsp;&nbsp;         |
## |&nbsp;&nbsp; Yes                                 |0 (NaN%)             |475 (41%)            |
## |&nbsp;&nbsp; No                                  |0 (NaN%)             |675 (59%)            |
## |&nbsp;&nbsp; Missing                             |150                  |54                   |

Length of follow-up

Get length of follow-up from longitudinal database.

options(digits = 3) ## return digits option to default
df %>%
  filter(NoScans >= 2) %>%
  group_by(PatientID) %>%
  slice(which.max(interval)) %>%
  # top_n(n = 1, wt = interval) %>%
  group_by(control0rest1) %>%
  dplyr::summarise(mean(interval), sd(interval), min(interval), max(interval))
## # A tibble: 2 x 5
##   control0rest1 `mean(interval)` `sd(interval)` `min(interval)`
##   <fct>                    <dbl>          <dbl>           <dbl>
## 1 control                   1.97           1.38           0.5  
## 2 MS                        3.41           3.15           0.233
## # … with 1 more variable: `max(interval)` <dbl>
options(digits = 7) ## return digits option to default

Demographics by MS subtype

options(qwraps2_markup = "markdown", digits = 2)

table1 <-
  list("N with follow-up" = 
         list("Yes" = ~ sum(NoScans>1)),
    "Gender" =
       list("Female" = ~ qwraps2::n_perc0(gender == "female", show_symbol = T),
            "Male"  = ~ qwraps2::n_perc0(gender == "male", show_symbol = T)),
        "Number of scans" =
       list("min" = ~ min(NoScans),
            "max" = ~ max(NoScans),
            "mean (sd)" = ~ qwraps2::mean_sd(NoScans)),
       "Age at baseline scan (years)" =
       list("min" = ~ min(age_at_baseline_scan1),
            "max" = ~ max(age_at_baseline_scan1),
            "mean (sd)" = ~ qwraps2::mean_sd(age_at_baseline_scan1)),
       "Brain-predicted age at baseline scan (years)" =
       list("min" = ~ min(BrainAge),
            "max" = ~ max(BrainAge),
            "mean (sd)" = ~ qwraps2::mean_sd(BrainAge)),
       "Disease duration at baseline (years)" =
       list("min" = ~ min(disease_duration, na.rm = T),
            "max" = ~ max(disease_duration, na.rm = T),
            "mean (sd)" = ~ qwraps2::mean_sd(disease_duration, na_rm = T, show_n = "never")),
        "EDSS at baseline " =
       list("min" = ~ min(EDSSbaseline, na.rm = T),
            "max" = ~ max(EDSSbaseline, na.rm = T),
            "mean (sd)" = ~ qwraps2::mean_sd(EDSSbaseline, na_rm = T, show_n = "never")),
       "Treatment" =
         list("Yes" = ~ qwraps2::n_perc0(DMT_YesNoNA == "YES", na_rm = T, show_symbol = T),
              "No"  = ~ qwraps2::n_perc0(DMT_YesNoNA == "NO", na_rm = T, show_symbol = T),
              "Missing" = ~ sum(is.na(DMT_YesNoNA)))
       )

print(summary_table(dplyr::group_by(df.bl, subtype), table1),
      rtitle = "Sample Chararcteristics",
      cnames = c("Controls", "CIS", "RRMS", "SPMS", "PPMS"))
## 
## 
## |Sample Chararcteristics                          |Controls             |CIS                  |RRMS                 |SPMS                 |PPMS                 |
## |:------------------------------------------------|:--------------------|:--------------------|:--------------------|:--------------------|:--------------------|
## |**N with follow-up**                             |&nbsp;&nbsp;         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |
## |&nbsp;&nbsp; Yes                                 |111                  |279                  |653                  |104                  |119                  |
## |**Gender**                                       |&nbsp;&nbsp;         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |
## |&nbsp;&nbsp; Female                              |82 (55%)             |199 (67%)            |453 (67%)            |67 (60%)             |52 (43%)             |
## |&nbsp;&nbsp; Male                                |68 (45%)             |97 (33%)             |224 (33%)            |44 (40%)             |68 (57%)             |
## |**Number of scans**                              |&nbsp;&nbsp;         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |
## |&nbsp;&nbsp; min                                 |1                    |1                    |1                    |1                    |1                    |
## |&nbsp;&nbsp; max                                 |10                   |5                    |7                    |3                    |5                    |
## |&nbsp;&nbsp; mean (sd)                           |2.82 &plusmn; 1.90   |2.44 &plusmn; 0.98   |2.71 &plusmn; 1.05   |2.25 &plusmn; 0.56   |2.80 &plusmn; 1.07   |
## |**Age at baseline scan (years)**                 |&nbsp;&nbsp;         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |
## |&nbsp;&nbsp; min                                 |23                   |15                   |18                   |30                   |19                   |
## |&nbsp;&nbsp; max                                 |66                   |55                   |66                   |68                   |65                   |
## |&nbsp;&nbsp; mean (sd)                           |37.29 &plusmn; 9.96  |33.01 &plusmn; 8.08  |38.83 &plusmn; 9.68  |50.11 &plusmn; 9.39  |48.55 &plusmn; 10.04 |
## |**Brain-predicted age at baseline scan (years)** |&nbsp;&nbsp;         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |
## |&nbsp;&nbsp; min                                 |14.5                 |13.7                 |7.4                  |40.2                 |31.0                 |
## |&nbsp;&nbsp; max                                 |70                   |82                   |92                   |89                   |84                   |
## |&nbsp;&nbsp; mean (sd)                           |38.43 &plusmn; 11.12 |37.60 &plusmn; 10.01 |51.33 &plusmn; 13.32 |67.36 &plusmn; 10.42 |59.77 &plusmn; 10.90 |
## |**Disease duration at baseline (years)**         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |
## |&nbsp;&nbsp; min                                 |Inf                  |0.0                  |0.0                  |2.5                  |1.0                  |
## |&nbsp;&nbsp; max                                 |-Inf                 |18                   |42                   |48                   |27                   |
## |&nbsp;&nbsp; mean (sd)                           |NaN &plusmn;  NA     |0.52 &plusmn; 1.50   |7.67 &plusmn; 7.31   |17.44 &plusmn; 9.04  |6.65 &plusmn; 5.63   |
## |**EDSS at baseline **                            |&nbsp;&nbsp;         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |
## |&nbsp;&nbsp; min                                 |Inf                  |0                    |0                    |3                    |2                    |
## |&nbsp;&nbsp; max                                 |-Inf                 |4.5                  |6.5                  |9.0                  |8.0                  |
## |&nbsp;&nbsp; mean (sd)                           |NaN &plusmn;  NA     |1.36 &plusmn; 1.02   |2.12 &plusmn; 1.40   |5.83 &plusmn; 1.20   |5.10 &plusmn; 1.32   |
## |**Treatment**                                    |&nbsp;&nbsp;         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |&nbsp;&nbsp;         |
## |&nbsp;&nbsp; Yes                                 |0 (NaN%)             |60 (20%)             |356 (56%)            |51 (50%)             |8 (7%)               |
## |&nbsp;&nbsp; No                                  |0 (NaN%)             |234 (80%)            |285 (44%)            |52 (50%)             |104 (93%)            |
## |&nbsp;&nbsp; Missing                             |150                  |2                    |36                   |8                    |8                    |

Length of follow-up

Get length of follow-up from longitudinal database.

options(digits = 3) ## return digits option to default
df %>%
  filter(NoScans >= 2) %>%
  group_by(PatientID) %>%
  slice(which.max(interval)) %>%
  # top_n(n = 1, wt = interval) %>%
  group_by(subtype) %>%
  dplyr::summarise(mean(interval), sd(interval), min(interval), max(interval))
## # A tibble: 5 x 5
##   subtype `mean(interval)` `sd(interval)` `min(interval)` `max(interval)`
##   <fct>              <dbl>          <dbl>           <dbl>           <dbl>
## 1 control             1.97           1.38           0.5              6   
## 2 CIS                 3.63           4.04           0.233           15   
## 3 RRMS                3.57           3.10           0.447           15   
## 4 SPMS                2.43           1.12           0.505            5.50
## 5 PPMS                2.86           1.65           0.833            6
options(digits = 7) ## return digits option to default

Correlations between demographics and clinical variables

Use corrplot package.

cor.mat <- cbind(df.bl$age_at_scan, df.bl$disease_onset_age, df.bl$disease_duration, df.bl$EDSSatScan)
colnames(cor.mat) <- c("Age at scan", "Age at diagnosis", "Time since diagnosis", "EDSS at scan")
corrplot(cor(cor.mat, use = "pairwise"), type = "upper", method = "color", addCoef.col = T, tl.col = "black", diag = F)

Age histograms

p <- ggplot() + xlab("Age (years)") + xlim(c(18,90)) + theme_cowplot()
banc_plot <- p + geom_histogram(aes(x = age), fill = "darkgoldenrod2", binwidth = 2, data = banc) + labs(title = "Training data", subtitle = "Healthy participants")
ms_plot <- p + geom_histogram(aes(x = age_at_scan), fill = "firebrick",  binwidth = 2, data = subset(df.bl, df.bl$control0rest1 == "MS"))  + labs(title = "Test data", subtitle = "MS and CIS patients")
hc_plot <- p + geom_histogram(aes(x = age_at_scan), fill = "darkgreen", binwidth = 2, data = subset(df.bl, df.bl$control0rest1 == "control"))  + labs(title = "Test data", subtitle = "Healthy controls")
p2 <- cowplot::plot_grid(banc_plot, ms_plot, hc_plot, labels = "AUTO", nrow = 1)
p2

cowplot::save_plot("~/Work/Articles/Brain age/MS/figures/BANC_MAGNIMS_age_histograms.pdf", p2, base_asp = 2.5)

Baseline brain-age analysis

describeBy(df.bl$BrainPAD, df.bl$control0rest1, mat = T, digits = 3) # brain-PAD by MS patient vs. controls
##     item  group1 vars    n   mean     sd median trimmed   mad     min
## X11    1 control    1  150  1.135  6.738  0.642   1.173 6.510 -16.507
## X12    2      MS    1 1204 10.875 10.237  9.443  10.246 9.684 -18.181
##        max  range  skew kurtosis    se
## X11 22.090 38.597 0.032    0.072 0.550
## X12 48.666 66.847 0.600    0.315 0.295

Evalute potential covariates

table(df.bl$Cohort, df.bl$field_strength)
##            
##             1.5T  3T
##   Amsterdam  193   0
##   Barcelona   62  90
##   UCL0         0  21
##   UCL1       148   6
##   UCL2         0  16
##   UCL3         0 147
##   UCL4         0  35
##   UCL5        50   0
##   UCL6        59   0
##   UCL7        34   0
##   Graz       174   0
##   Imperial     0  25
##   Milan       60  54
##   Rome        71   0
##   Siena      109   0
fit <- lm(BrainPAD ~ poly(age_at_baseline_scan1, 2) + gender + ICV + field_strength + Cohort, data = df.bl)
anova(fit)
## Analysis of Variance Table
## 
## Response: BrainPAD
##                                  Df Sum Sq Mean Sq F value    Pr(>F)    
## poly(age_at_baseline_scan1, 2)    2    445  222.73  2.3603  0.094788 .  
## gender                            1    741  741.38  7.8563  0.005138 ** 
## ICV                               1     12   11.93  0.1264  0.722278    
## field_strength                    1    693  692.58  7.3392  0.006833 ** 
## Cohort                           14  17705 1264.64 13.4013 < 2.2e-16 ***
## Residuals                      1334 125886   94.37                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Hierarchical partitioning of brain-PAD

a <- lm(BrainPAD ~ poly(age_at_baseline_scan1, 2) + gender + wb_vol_ratio_icv +  field_strength + Cohort, data = df.bl)
summary(a)
## 
## Call:
## lm(formula = BrainPAD ~ poly(age_at_baseline_scan1, 2) + gender + 
##     wb_vol_ratio_icv + field_strength + Cohort, data = df.bl)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.641  -4.486  -0.235   4.181  32.055 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      142.6576     3.7241  38.307  < 2e-16 ***
## poly(age_at_baseline_scan1, 2)1 -164.4998     8.7462 -18.808  < 2e-16 ***
## poly(age_at_baseline_scan1, 2)2  -11.9125     7.0751  -1.684 0.092471 .  
## gendermale                        -1.1586     0.4034  -2.872 0.004142 ** 
## wb_vol_ratio_icv                -164.4643     4.5651 -36.027  < 2e-16 ***
## field_strength3T                  -1.7440     0.8259  -2.112 0.034907 *  
## CohortBarcelona                   -3.6913     0.9170  -4.025 6.01e-05 ***
## CohortUCL0                         0.7741     1.8032   0.429 0.667782    
## CohortUCL1                        -2.1112     0.7838  -2.694 0.007157 ** 
## CohortUCL2                        -0.8549     2.0068  -0.426 0.670182    
## CohortUCL3                        -3.0754     1.1239  -2.736 0.006297 ** 
## CohortUCL4                        -5.4675     1.5208  -3.595 0.000336 ***
## CohortUCL5                        -1.7564     1.1165  -1.573 0.115914    
## CohortUCL6                        -1.7447     1.0339  -1.688 0.091731 .  
## CohortUCL7                        -7.3669     1.3185  -5.587 2.79e-08 ***
## CohortGraz                        -4.9941     0.7700  -6.486 1.24e-10 ***
## CohortImperial                    -5.4688     1.6925  -3.231 0.001263 ** 
## CohortMilan                        4.6601     0.9195   5.068 4.58e-07 ***
## CohortRome                        -0.6036     0.9675  -0.624 0.532844    
## CohortSiena                        4.1644     0.8522   4.887 1.15e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.916 on 1334 degrees of freedom
## Multiple R-squared:  0.5614, Adjusted R-squared:  0.5552 
## F-statistic: 89.87 on 19 and 1334 DF,  p-value: < 2.2e-16
h.p <- hier.part(y = df.bl$BrainPAD, 
                 xcan = df.bl[c("age_at_baseline_scan1","gender", "wb_vol_ratio_icv", "field_strength", "Cohort")], 
                 gof = "Rsqu",
                 barplot = FALSE)
round(h.p$IJ, 3)
##                           I      J Total
## age_at_baseline_scan1 0.056 -0.056 0.000
## gender                0.004  0.001 0.005
## wb_vol_ratio_icv      0.389 -0.044 0.345
## field_strength        0.006 -0.002 0.004
## Cohort                0.105  0.011 0.116
round(h.p$I.perc, 1)
##                          I
## age_at_baseline_scan1 10.0
## gender                 0.7
## wb_vol_ratio_icv      69.3
## field_strength         1.1
## Cohort                18.8
bar.colors <- c("darkgoldenrod2", "grey")
barplot(t(as.matrix(h.p$IJ[,1:2])),
        names.arg = c("Age", "Sex", "NBV", "Field strength", "Cohort"),
        col = bar.colors,
         ylim = c(0,0.4),
        xlab = "Model predictors",
        ylab = expression(paste("Brain-PAD variance explained R"^"2")))
legend("topright", c("Unique variance", "Shared variance"), fill = bar.colors, bty = "n", title = "Legend")

Results suggest that age, age^2, normalised brain volume (AKA wb_vol_ratio_icv), gender, Cohort and field strength are appropriate covariates. # Predict brain-PAD based on group Function to run a linear mixed effect (LME) model adjusting for: fixed effects of age, age^2, gender and field strength; random effects of cohort.

run_lm <- function(var, data) {
  m1 <- lmer(BrainPAD ~ data[[var]] +
               poly(age_at_scan, 2) +
               gender +
               field_strength +
               wb_vol_ratio_icv +
               (1|Cohort),
             data = data,
             control = lmerControl(optimizer = "Nelder_Mead"))
  return(m1)
}

Main effect of group (MS vs. controls)

fit <- run_lm("control0rest1", df.bl)
summary(fit)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## BrainPAD ~ data[[var]] + poly(age_at_scan, 2) + gender + field_strength +  
##     wb_vol_ratio_icv + (1 | Cohort)
##    Data: data
## Control: lmerControl(optimizer = "Nelder_Mead")
## 
## REML criterion at convergence: 9034.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.1381 -0.6419 -0.0329  0.6186  4.7265 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Cohort   (Intercept)  8.643   2.940   
##  Residual             45.856   6.772   
## Number of obs: 1354, groups:  Cohort, 15
## 
## Fixed effects:
##                        Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)            129.6722     3.9073 1183.7942  33.187  < 2e-16 ***
## data[[var]]MS            6.0432     0.7698 1265.8978   7.850 8.82e-15 ***
## poly(age_at_scan, 2)1 -166.3679     8.5264 1347.0000 -19.512  < 2e-16 ***
## poly(age_at_scan, 2)2  -15.3559     6.9240 1342.9439  -2.218   0.0267 *  
## gendermale              -0.9364     0.3956 1338.6090  -2.367   0.0181 *  
## field_strength3T        -1.8549     0.7352  408.4067  -2.523   0.0120 *  
## wb_vol_ratio_icv      -156.8503     4.5515 1346.9993 -34.461  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) d[[]]M p(__,2)1 p(__,2)2 gndrml fld_3T
## dat[[vr]]MS -0.366                                       
## ply(g__,2)1 -0.430 -0.040                                
## ply(g__,2)2  0.049 -0.063  0.010                         
## gendermale  -0.240  0.070  0.121    0.049                
## fld_strng3T -0.026  0.012 -0.009   -0.025   -0.051       
## wb_vl_rt_cv -0.961  0.212  0.466   -0.042    0.209 -0.051

Estimated marginal means

Generate EMMs for all MS/CIS and healthy controls. Need to re-fit same model as above so that lme4 object can be read by emmeans.

fit <- lmer(BrainPAD ~ control0rest1 +
               poly(age_at_scan, 2) +
               gender +
               field_strength +
              wb_vol_ratio_icv +
               (1|Cohort),
             data = df.bl,
             control = lmerControl(optimizer = "Nelder_Mead"))
emmeans(object = fit, pairwise ~ control0rest1)
## $emmeans
##  control0rest1 emmean   SE   df lower.CL upper.CL
##  control         4.25 1.04 36.8     2.14     6.36
##  MS             10.29 0.83 15.8     8.53    12.05
## 
## Results are averaged over the levels of: gender, field_strength 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast     estimate    SE   df t.ratio p.value
##  control - MS    -6.04 0.774 1261 -7.810  <.0001 
## 
## Results are averaged over the levels of: gender, field_strength 
## Degrees-of-freedom method: kenward-roger

Meta-analysis looking at all the separate cohorts with MS/CIS patients and controls

Check which cohorts contain healthy controls and patients.

table(df.bl$subtype, df.bl$Cohort)
##          
##           Amsterdam Barcelona UCL0 UCL1 UCL2 UCL3 UCL4 UCL5 UCL6 UCL7 Graz
##   control         0         0    0    0    0   82   16   17   17   10    0
##   CIS             4        84    0   75    0    0    0    0    0   24   95
##   RRMS          131        63   21   77    0   28    0   33    0    0   69
##   SPMS           36         5    0    2    7   23    0    0    0    0    9
##   PPMS           22         0    0    0    9   14   19    0   42    0    1
##          
##           Imperial Milan Rome Siena
##   control        8     0    0     0
##   CIS            0    11    1     2
##   RRMS          16    66   66   107
##   SPMS           1    24    4     0
##   PPMS           0    13    0     0

Create data.frame with summary data appropriate for meta-analysis.

tmp0 <- df.bl %>% group_by(Cohort, control0rest1) %>% dplyr::summarise(length(control0rest1))
meta.cohorts <- as.list(tmp0[tmp0$control0rest1 == "control",1])$Cohort
tmp <- df.bl %>%
  filter(str_detect(Cohort, paste(meta.cohorts, collapse = "|")))
tmp1 <- tmp %>% group_by(Cohort, control0rest1) %>% dplyr::summarise(n = n(), Mean = mean(BrainPAD), SD  =  sd(BrainPAD))
tmp2 <- dcast(tmp1, Cohort ~ control0rest1, value.var = "n")
tmp3 <- dcast(tmp1, Cohort ~ control0rest1, value.var = "Mean")
tmp4 <- dcast(tmp1, Cohort ~ control0rest1, value.var = "SD")
names(tmp2) <- c("Cohort", "control_n", "MS_n")
names(tmp3) <- c("Cohort", "control_mean", "MS_mean")
names(tmp4) <- c("Cohort", "control_sd", "MS_sd")
tmp1x <- tmp %>% group_by(Cohort) %>% dplyr::summarise(pooled.SD  =  sd(BrainPAD))
meta.df <- join_all(list(tmp2,tmp3,tmp4, tmp1x), by = 'Cohort', type = 'left')
rm(list = ls(pattern = 'tmp*')) # remove temporary data frames

Fit a random-effects meta-analysis using REML

Using the metafor package.

meta.df <- escalc(m1i = control_mean, sd1i = pooled.SD, n1i = control_n, m2i = MS_mean, sd2i = pooled.SD, n2i = MS_n, measure = "MD", data = meta.df, digits = 2)
meta.results <- rma(yi, vi, data = meta.df, method = "REML")
print(meta.results)
## 
## Random-Effects Model (k = 6; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of total heterogeneity): 11.8177 (SE = 13.0982)
## tau (square root of estimated tau^2 value):      3.4377
## I^2 (total heterogeneity / total variability):   59.03%
## H^2 (total variability / sampling variability):  2.44
## 
## Test for Heterogeneity: 
## Q(df = 5) = 13.4832, p-val = 0.0192
## 
## Model Results:
## 
## estimate      se     zval    pval     ci.lb    ci.ub     
##  -9.4543  1.8649  -5.0695  <.0001  -13.1095  -5.7990  ***
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(meta.results)
## 
##        estimate  ci.lb   ci.ub
## tau^2   11.8177 0.2530 84.1148
## tau      3.4377 0.5030  9.1714
## I^2(%)  59.0288 2.9923 91.1149
## H^2      2.4407 1.0308 11.2548

Forest plot of results

plot.forest %<a-% {
forest(meta.results, ilab = cbind(meta.df$MS_n, meta.df$control_n), ilab.xpos = c(-30,-23), slab = meta.df$Cohort, digits = 1, xlab = "MS vs. Healthy control group mean difference", steps = 6, col = "red", cex = 1.25, pch = 22, bg = "blue"); text(c(-40, -30, -23), 7.6, c("Cohort", "MS n", "HC n"), font = 2, cex = 1.25)
}
plot.forest

cairo_pdf("plots/forest_plot.pdf", 6,5)
plot.forest
dev.off()
## quartz_off_screen 
##                 2

Linear regression analysis in cohort UCL3

Includes covariates: age, age^2, gender.

summary(lm(BrainPAD ~ control0rest1 + poly(age_at_baseline_scan1, 2) + gender + wb_vol_ratio_icv, data = subset(df.bl, df.bl$Cohort == "UCL3")))
## 
## Call:
## lm(formula = BrainPAD ~ control0rest1 + poly(age_at_baseline_scan1, 
##     2) + gender + wb_vol_ratio_icv, data = subset(df.bl, df.bl$Cohort == 
##     "UCL3"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -20.7410  -3.8178   0.1203   4.0826  14.3160 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       98.7584     9.9120   9.963  < 2e-16 ***
## control0rest1MS                    9.2958     1.3312   6.983 1.04e-10 ***
## poly(age_at_baseline_scan1, 2)1  -46.2627     7.5736  -6.108 9.27e-09 ***
## poly(age_at_baseline_scan1, 2)2    0.2071     6.3500   0.033    0.974    
## gendermale                        -0.7397     1.1030  -0.671    0.504    
## wb_vol_ratio_icv                -121.3744    12.0628 -10.062  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.3 on 141 degrees of freedom
## Multiple R-squared:  0.6724, Adjusted R-squared:  0.6608 
## F-statistic: 57.89 on 5 and 141 DF,  p-value: < 2.2e-16

Brain-PAD by MS subtype

Using a linear mixed effect (LME) model, to compare subtypes. This analysis excluded controls. Adjusting for age, age^2, gender, field strength, normalised brain volume and cohort.

subtypes <- run_lm("subtype", subset(df.bl, df.bl$subtype != "control"))
summary(subtypes)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## BrainPAD ~ data[[var]] + poly(age_at_scan, 2) + gender + field_strength +  
##     wb_vol_ratio_icv + (1 | Cohort)
##    Data: data
## Control: lmerControl(optimizer = "Nelder_Mead")
## 
## REML criterion at convergence: 7969.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.8602 -0.6520 -0.0126  0.6072  4.5227 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Cohort   (Intercept)  5.353   2.314   
##  Residual             43.862   6.623   
## Number of obs: 1204, groups:  Cohort, 15
## 
## Fixed effects:
##                        Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)            126.1933     3.9556 1133.0602  31.902  < 2e-16 ***
## data[[var]]RRMS          5.1218     0.5834  927.4206   8.779  < 2e-16 ***
## data[[var]]SPMS          6.5847     0.9381 1127.8282   7.019 3.85e-12 ***
## data[[var]]PPMS          4.5069     1.0695  370.4821   4.214 3.15e-05 ***
## poly(age_at_scan, 2)1 -172.6828     8.6976 1193.8555 -19.854  < 2e-16 ***
## poly(age_at_scan, 2)2  -16.5325     6.8839 1189.0349  -2.402   0.0165 *  
## gendermale              -0.7640     0.4149 1188.4981  -1.841   0.0658 .  
## field_strength3T        -0.6255     0.7130  230.2779  -0.877   0.3813    
## wb_vol_ratio_icv      -150.8232     4.7561 1194.8731 -31.711  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) d[[]]R d[[]]S d[[]]P p(__,2)1 p(__,2)2 gndrml fld_3T
## dt[[v]]RRMS -0.282                                                     
## dt[[v]]SPMS -0.334  0.560                                              
## dt[[v]]PPMS -0.194  0.498  0.478                                       
## ply(g__,2)1 -0.365 -0.063 -0.182 -0.212                                
## ply(g__,2)2  0.056  0.051 -0.108 -0.096  0.055                         
## gendermale  -0.218  0.010 -0.007 -0.104  0.129    0.059                
## fld_strng3T -0.073  0.202  0.058  0.120 -0.021   -0.010   -0.053       
## wb_vl_rt_cv -0.974  0.162  0.258  0.098  0.398   -0.060    0.197 -0.016
anova(subtypes)
## Type III Analysis of Variance Table with Satterthwaite's method
##                      Sum Sq Mean Sq NumDF   DenDF   F value  Pr(>F)    
## data[[var]]            3701    1234     3  773.45   28.1258 < 2e-16 ***
## poly(age_at_scan, 2)  17365    8683     2 1191.15  197.9519 < 2e-16 ***
## gender                  149     149     1 1188.50    3.3909 0.06581 .  
## field_strength           34      34     1  230.28    0.7696 0.38126    
## wb_vol_ratio_icv      44108   44108     1 1194.87 1005.6082 < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Brain-PAD estimated marginal means for subtypes

Generate EMMs for all MS subtypes.

fit_subtype <- lmer(BrainPAD ~ subtype +
               poly(age_at_scan, 2) +
               gender +
               field_strength +
              wb_vol_ratio_icv +
               (1|Cohort),
            # data = subset(df.bl, df.bl$control0rest1 != "control"),
            data = df.bl,
             control = lmerControl(optimizer = "Nelder_Mead"))
emmeans(object = fit_subtype, pairwise ~ subtype)
## $emmeans
##  subtype emmean    SE   df lower.CL upper.CL
##  control   4.27 0.901 49.8     2.47     6.08
##  CIS       6.43 0.802 32.7     4.79     8.06
##  RRMS     11.57 0.691 19.7    10.13    13.02
##  SPMS     13.02 0.955 71.9    11.11    14.92
##  PPMS     10.44 0.974 63.8     8.49    12.38
## 
## Results are averaged over the levels of: gender, field_strength 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast       estimate    SE   df t.ratio p.value
##  control - CIS     -2.15 0.916  724 -2.350  0.1305 
##  control - RRMS    -7.30 0.811  868 -9.001  <.0001 
##  control - SPMS    -8.74 1.013 1227 -8.631  <.0001 
##  control - PPMS    -6.16 0.952 1296 -6.474  <.0001 
##  CIS - RRMS        -5.15 0.575 1100 -8.949  <.0001 
##  CIS - SPMS        -6.59 0.916 1295 -7.194  <.0001 
##  CIS - PPMS        -4.01 1.011  659 -3.966  0.0008 
##  RRMS - SPMS       -1.44 0.764 1342 -1.891  0.3227 
##  RRMS - PPMS        1.14 0.871  700  1.303  0.6893 
##  SPMS - PPMS        2.58 0.986 1110  2.618  0.0678 
## 
## Results are averaged over the levels of: gender, field_strength 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 5 estimates

Save table for importing into Word

x <- as.data.frame(emmeans(object = fit_subtype, pairwise ~ subtype)$contrasts)
write.table(format(x, digits = 3), file = "~/Work/Articles/Brain age/MS/Annals Neurology/pairwise_brain_PAD.txt", sep = ",", quote = F, row.names = F)

Plot the EMMs and confidence intervals

plot_model(fit_subtype, type = "pred", terms = c("subtype")) +
  theme_cowplot()

Brain-PAD boxplot by MS subtype

# calculate Ns
control_n <- with(df.bl, table(subtype))["control"]
cis_n <- with(df.bl, table(subtype))["CIS"]
rrms_n <- with(df.bl, table(subtype))["RRMS"]
spms_n <- with(df.bl, table(subtype))["SPMS"]
ppms_n <- with(df.bl, table(subtype))["PPMS"]

plot.pryr %<a-% {
with(df.bl, ehplot(BrainPAD, groups = subtype, ylim = c(-20,52), pch = 21, bg = ms.palette[as.numeric(subtype)], box = T, offset = 0.1, intervals = 50, xlab = "Groups", ylab = "Brain-PAD (years)", main = "Baseline Brain-PAD by study group")) + 
  abline(0,0, lty = 2) +
  text(1,51, paste("N = ", control_n, sep = ""), cex = 0.8) +
  text(2,51, paste("N = ", cis_n, sep = ""), cex = 0.8) +
  text(3,51, paste("N = ", rrms_n, sep = ""), cex = 0.8) +
  text(4,51, paste("N = ", spms_n, sep = ""), cex = 0.8) +
  text(5,51, paste("N = ", ppms_n, sep = ""), cex = 0.8)
}
plot.pryr

## integer(0)
cairo_pdf("plots/MS_UCL_MAGNIMS_combined_group_Brain-PAD.pdf", 8,8)
plot.pryr
## integer(0)
dev.off()
## quartz_off_screen 
##                 2

Brain-PAD boxplot by cohort and by MS subtype

Also, Estimate marginal means plot

Uses sjPlot to calculated EMMs from linear regression model

p1 <- ggplot(df.bl, aes(x = subtype, y = BrainPAD, fill = subtype)) +
  geom_boxplot() +
  facet_wrap(df.bl$Cohort, nrow = 1) +
  geom_hline(aes(yintercept = 0), lty = 2) +
  theme_cowplot() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  theme(legend.position = "none") +
  scale_fill_manual(values = ms.palette)

fit <- lm(BrainPAD ~ control0rest1 + poly(age_at_baseline_scan1, 2) + gender + field_strength + wb_vol_ratio_icv + Cohort, data = df.bl)
p2 <- plot_model(fit, type = "emm", terms = c("Cohort", "control0rest1"), colors = c("darkgreen", "firebrick"), show.legend = F) + theme_cowplot()

plot_grid(list(p1,p2))

ggsave("~/Work/Brain ageing/Collaborations/MS/plots/brainPAD_cohort_subtype_boxplot_EMM_plot.pdf", width = 15, height = 10, useDingbats = FALSE)

Brain-PAD boxplot by MS subtype in cohort UCL3 only

# calculate Ns
C3.bl <- df.bl %>% filter(Cohort == "UCL3") 
C3.bl$subtype <- factor(C3.bl$subtype)
control_n <- with(C3.bl, table(subtype))["control"]
rrms_n <- with(C3.bl, table(subtype))["RRMS"]
spms_n <- with(C3.bl, table(subtype))["SPMS"]
ppms_n <- with(C3.bl, table(subtype))["PPMS"]

plot.pryr %<a-% {
  with(C3.bl, ehplot(BrainPAD, groups = subtype, ylim = c(-20,52), pch = 21, bg = ms.palette[-2][as.numeric(subtype)], box = T, offset = 0.1, intervals = 50, xlab = "Groups", ylab = "Brain-PAD (years)", main = "Baseline Brain-PAD by study group")) + 
  abline(0,0, lty = 2) +
  text(1,51, paste("N = ", control_n, sep = ""), cex = 0.8) +
  text(2,51, paste("N = ", rrms_n, sep = ""), cex = 0.8) +
  text(3,51, paste("N = ", spms_n, sep = ""), cex = 0.8) +
  text(4,51, paste("N = ", ppms_n, sep = ""), cex = 0.8)
}
plot.pryr

## integer(0)
cairo_pdf("plots/MS_cohort_C3_Brain-PAD.pdf", 8,8)
plot.pryr
## integer(0)
dev.off()
## quartz_off_screen 
##                 2

Cohort UCL3 statistics

Use OLS regression, no random effects necessary.

fit_UCL3 <- lm(BrainPAD ~ subtype +
                 poly(age_at_scan, 2) +
                 gender +
                 wb_vol_ratio_icv,
               data = subset(df.bl, df.bl$Cohort == "UCL3"))
anova(fit_UCL3)
## Analysis of Variance Table
## 
## Response: BrainPAD
##                       Df Sum Sq Mean Sq F value  Pr(>F)    
## subtype                3 7250.8  2416.9 60.4945 < 2e-16 ***
## poly(age_at_scan, 2)   2  492.7   246.3  6.1655 0.00272 ** 
## gender                 1  146.8   146.8  3.6745 0.05730 .  
## wb_vol_ratio_icv       1 3638.6  3638.6 91.0720 < 2e-16 ***
## Residuals            139 5553.5    40.0                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans(object = fit_UCL3, pairwise ~ subtype)
## $emmeans
##  subtype emmean   SE  df lower.CL upper.CL
##  control   3.04 1.00 139     1.07     5.02
##  RRMS     11.68 1.34 139     9.04    14.32
##  SPMS     13.66 1.70 139    10.29    17.02
##  PPMS     12.75 1.82 139     9.14    16.35
## 
## Results are averaged over the levels of: gender 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast       estimate   SE  df t.ratio p.value
##  control - RRMS   -8.638 1.55 139 -5.579  <.0001 
##  control - SPMS  -10.611 1.95 139 -5.455  <.0001 
##  control - PPMS   -9.702 2.00 139 -4.859  <.0001 
##  RRMS - SPMS      -1.974 1.93 139 -1.022  0.7371 
##  RRMS - PPMS      -1.064 2.15 139 -0.496  0.9599 
##  SPMS - PPMS       0.909 2.21 139  0.412  0.9764 
## 
## Results are averaged over the levels of: gender 
## P value adjustment: tukey method for comparing a family of 4 estimates

Brain-PAD by subtype descriptive statistics

with(df.bl, describeBy(BrainPAD, subtype, mat = T, digits = 1)) # brain-PAD by MS patient subtypes and controls
##     item  group1 vars   n mean   sd median trimmed  mad   min  max range
## X11    1 control    1 150  1.1  6.7    0.6     1.2  6.5 -16.5 22.1  38.6
## X12    2     CIS    1 296  4.6  7.2    4.3     4.3  6.5 -13.1 31.5  44.5
## X13    3    RRMS    1 677 12.5 10.0   11.1    12.0  9.4 -18.2 48.7  66.8
## X14    4    SPMS    1 111 17.3 10.5   16.9    17.1 11.1  -6.7 43.9  50.6
## X15    5    PPMS    1 120 11.2 10.3   10.3    10.7 10.6  -5.3 46.4  51.7
##     skew kurtosis  se
## X11  0.0      0.1 0.6
## X12  0.5      0.9 0.4
## X13  0.5      0.3 0.4
## X14  0.2     -0.4 1.0
## X15  0.5      0.1 0.9

Lesion filling

To establish whether using the FSL lesion filling software influences brain-predicted age values. This analysis was conducted only in UCL patients (n = 575).

with(subset(lesion_df, lesion_df$subtype != "control"), describeBy(filled_brain_age, subtype))
## 
##  Descriptive statistics by group 
## group: control
## NULL
## -------------------------------------------------------- 
## group: CIS
##    vars n mean   sd median trimmed  mad  min   max range  skew kurtosis
## X1    1 8 42.4 6.18  42.57    42.4 7.87 34.3 49.69 15.39 -0.09    -1.98
##      se
## X1 2.18
## -------------------------------------------------------- 
## group: RRMS
##    vars   n  mean    sd median trimmed   mad   min   max range  skew
## X1    1 382 54.78 11.46  54.79   54.78 12.12 24.26 87.53 63.27 -0.03
##    kurtosis   se
## X1    -0.44 0.59
## -------------------------------------------------------- 
## group: SPMS
##    vars   n  mean   sd median trimmed  mad   min   max range  skew
## X1    1 119 64.62 9.24  65.96   65.41 8.45 40.33 81.64 41.31 -0.74
##    kurtosis   se
## X1    -0.03 0.85
## -------------------------------------------------------- 
## group: PPMS
##    vars  n mean    sd median trimmed   mad  min   max range skew kurtosis
## X1    1 66 62.2 10.13  59.63   61.67 10.57 44.4 84.13 39.72 0.46    -0.61
##      se
## X1 1.25

Correlation between brain-predicted age from filled and unfilled images: Pearson’s r = 0.994. Median absolute error (MAE) between brain-predicted age from filled and unfilled images = 0.3717 years. Mean difference between brain-predicted age from filled and unfilled images = 0.28 ± 1.29 years.

Lesion filled vs. unfilled scatterplot

ggplot(data = subset(lesion_df, lesion_df$subtype != "control"), aes(x = brain_age, y = filled_brain_age)) +
  geom_abline(slope = 1) +
  geom_point(pch = 21, aes(fill = subtype), size = 2) +
  labs(x = "Unfilled brain-predicted age (years)", y = "Lesion-filled brain-predicted age (years)") +
  xlim(c(20,90)) +
  scale_fill_manual(values = ms.palette[-1]) +
  theme_cowplot() + theme(legend.position = c(0.8, 0.2))
## Warning: Removed 1441 rows containing missing values (geom_point).

ggsave("~/Work/Brain ageing/Collaborations/MS/plots/lesion_filling_brain_age_plot.pdf", width = 5, height = 5, useDingbats = FALSE)
## Warning: Removed 1441 rows containing missing values (geom_point).

Lesion filled vs. unfilled Bland-Altman plot

mean.diff <- mean(lesion_df$brain_age - lesion_df$filled_brain_age, na.rm = T)
sd.diff <- sd(lesion_df$brain_age - lesion_df$filled_brain_age, na.rm = T)
ggplot(data = subset(lesion_df, lesion_df$subtype != "control"), aes(x = ((brain_age + filled_brain_age)/2), y = brain_age - filled_brain_age)) +
  geom_abline(slope = 0, lty = 2) +
  geom_point(pch = 21, aes(fill = subtype), size = 2) +
  geom_hline(yintercept = mean.diff, color = "darkgoldenrod1", lwd = 1) + # mean difference line
  geom_hline(yintercept = mean.diff + 1.96*sd.diff, color = "darkgoldenrod2", lty = 2) + # upper 95% line
  geom_hline(yintercept = mean.diff - 1.96*sd.diff, color = "darkgoldenrod2", lty = 2) + # lower 95% line
  # geom_smooth(method = "lm", level = 0.95, color = "black", lwd = 0.3) +
  labs(x = "Mean of filled/unfilled brain-predicted age (years)", y = "Unfilled - Lesion-filled brain-predicted age (years)") +
  # ylim(c(-20,20)) +
  scale_fill_manual(values = ms.palette[-1]) +
  theme_cowplot() + theme(legend.position = c(0.1, 0.8)) +
  annotate("text", x = 25, y = mean.diff + 1.96*sd.diff + 0.5, label = "+1.96*SD", color = "darkgoldenrod2") +
  annotate("text", x = 25, y = mean.diff - 1.96*sd.diff - 0.5, label = "-1.96*SD", color = "darkgoldenrod2") +
  annotate("text", x = 27.5, y = mean.diff + 0.8, label = "Mean difference", color = "darkgoldenrod2")
## Warning: Removed 1441 rows containing missing values (geom_point).

ggsave("~/Work/Brain ageing/Collaborations/MS/plots/lesion_filling_brain_age_BA_plot.pdf", width = 8, height = 5, useDingbats = FALSE)
## Warning: Removed 1441 rows containing missing values (geom_point).

Correlates of brain-PAD at baseline

EDSS score, an index of disability

LME model accounting for fixed effects of age at baseline, age^2, gender, field strength, normalised brain volume and random effects of Cohort.

summary(run_lm("EDSSbaseline", df.bl))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## BrainPAD ~ data[[var]] + poly(age_at_scan, 2) + gender + field_strength +  
##     wb_vol_ratio_icv + (1 | Cohort)
##    Data: data
## Control: lmerControl(optimizer = "Nelder_Mead")
## 
## REML criterion at convergence: 7813.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9891 -0.6338 -0.0466  0.6106  4.8192 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Cohort   (Intercept)  8.433   2.904   
##  Residual             45.055   6.712   
## Number of obs: 1174, groups:  Cohort, 14
## 
## Fixed effects:
##                        Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)            133.8648     4.1224 1061.6482  32.473  < 2e-16 ***
## data[[var]]              0.6369     0.1416 1116.0234   4.497 7.61e-06 ***
## poly(age_at_scan, 2)1 -186.5305     9.3459 1165.0388 -19.959  < 2e-16 ***
## poly(age_at_scan, 2)2  -25.6213     7.3294 1160.9933  -3.496 0.000491 ***
## gendermale              -0.9577     0.4216 1159.8982  -2.272 0.023284 *  
## field_strength3T        -1.7902     0.7337  396.6175  -2.440 0.015124 *  
## wb_vol_ratio_icv      -156.4270     4.9237 1165.1857 -31.770  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) dt[[]] p(__,2)1 p(__,2)2 gndrml fld_3T
## data[[var]] -0.374                                       
## ply(g__,2)1 -0.333 -0.224                                
## ply(g__,2)2  0.082 -0.127  0.052                         
## gendermale  -0.221  0.007  0.108    0.049                
## fld_strng3T -0.035  0.031 -0.012   -0.028   -0.051       
## wb_vl_rt_cv -0.972  0.290  0.366   -0.076    0.196 -0.041

When predicting brain-PAD in a LME model, the effect of EDSS at baseline beta = 0.64, 95% CI = 0.36, 0.91, p = 7.605e-06.

Testing for a polynomial fit of EDSS baseline

fit.edss.poly <- lmer(BrainPAD ~ 
                        poly(EDSSbaseline,2) +
                        poly(age_at_scan, 2) +
                        gender +
                        field_strength +
                        wb_vol_ratio_icv +
                        (1|Cohort),
                      data = subset(df.bl, !is.na(df.bl$EDSSbaseline)),
                      control = lmerControl(optimizer = "Nelder_Mead"))
summary(fit.edss.poly)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## BrainPAD ~ poly(EDSSbaseline, 2) + poly(age_at_scan, 2) + gender +  
##     field_strength + wb_vol_ratio_icv + (1 | Cohort)
##    Data: subset(df.bl, !is.na(df.bl$EDSSbaseline))
## Control: lmerControl(optimizer = "Nelder_Mead")
## 
## REML criterion at convergence: 7796.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9258 -0.6284 -0.0525  0.6069  4.8209 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Cohort   (Intercept)  8.422   2.902   
##  Residual             44.993   6.708   
## Number of obs: 1174, groups:  Cohort, 14
## 
## Fixed effects:
##                         Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)             135.3383     3.9934 1050.1573  33.890  < 2e-16 ***
## poly(EDSSbaseline, 2)1   43.3875     9.4512 1113.9207   4.591 4.92e-06 ***
## poly(EDSSbaseline, 2)2  -11.3812     7.0429 1165.1705  -1.616 0.106366    
## poly(age_at_scan, 2)1  -175.1301     8.7644 1164.1102 -19.982  < 2e-16 ***
## poly(age_at_scan, 2)2   -23.2697     6.9159 1159.2976  -3.365 0.000791 ***
## gendermale               -0.9541     0.4213 1158.8600  -2.265 0.023711 *  
## field_strength3T         -1.7994     0.7332  395.4624  -2.454 0.014548 *  
## wb_vol_ratio_icv       -156.3987     4.9203 1164.1857 -31.786  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) p(EDSS,2)1 p(EDSS,2)2 p(__,2)1 p(__,2)2 gndrml fld_3T
## pl(EDSS,2)1 -0.294                                                      
## pl(EDSS,2)2  0.002 -0.061                                               
## ply(g__,2)1 -0.363 -0.224      0.026                                    
## ply(g__,2)2  0.073 -0.122     -0.076      0.040                         
## gendermale  -0.228  0.007     -0.005      0.108    0.050                
## fld_strng3T -0.033  0.030      0.008     -0.012   -0.029   -0.051       
## wb_vl_rt_cv -0.975  0.289     -0.004      0.367   -0.076    0.196 -0.042

Non-parametric correlation between brain-PAD and EDSS at baseline

with(df.bl, cor.test(BrainPAD, EDSSbaseline, method = "spearman"))
## Warning in cor.test.default(BrainPAD, EDSSbaseline, method = "spearman"):
## Cannot compute exact p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  BrainPAD and EDSSbaseline
## S = 200790000, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.2554724

Plot EDSS histogram by subtype

ggplot(subset(df.bl, !is.na(df.bl$EDSSbaseline)), aes(EDSSbaseline, fill = subtype)) +
  scale_fill_manual(values = ms.palette[-1]) +
  geom_histogram(bins = 20) +
  facet_wrap(~ subtype) +
  theme_cowplot()

Test for interaction between subtype and EDSS on brain-PAD

fit.edss <- lmer(BrainPAD ~ EDSSbaseline * subtype + poly(age_at_baseline_scan1,2) + gender + field_strength + wb_vol_ratio_icv + (1|Cohort), data = subset(df.bl, df.bl$subtype != "control"))
round(as.matrix(anova(fit.edss)["EDSSbaseline:subtype",]),3)
##                       Sum Sq Mean Sq NumDF    DenDF F value Pr(>F)
## EDSSbaseline:subtype 138.445  46.148     3 1157.787   1.079  0.357

Use simple slopes from jtools to extract adjusted slopes for each subtype. Need to fit model without age^2 as poly() is incompatible with sim_slopes().

fit.edss2 <- lmer(BrainPAD ~ EDSSbaseline * subtype + age_at_baseline_scan1 + gender + field_strength + wb_vol_ratio_icv + (1|Cohort), data = subset(df.bl, df.bl$subtype != "control"))
sim_slopes(fit.edss2, pred = "EDSSbaseline", modx = "subtype", johnson_neyman = F)
## SIMPLE SLOPES ANALYSIS 
## 
## Slope of EDSSbaseline when subtype = CIS: 
##  Est. S.E.   df    p
##  0.15 0.40 0.37 0.71
## 
## Slope of EDSSbaseline when subtype = RRMS: 
##  Est. S.E.   df    p
##  0.79 0.21 3.78 0.00
## 
## Slope of EDSSbaseline when subtype = SPMS: 
##  Est. S.E.   df    p
##  0.20 0.53 0.38 0.71
## 
## Slope of EDSSbaseline when subtype = PPMS: 
##  Est. S.E.   df    p
##  0.27 0.46 0.58 0.56

Use interact_plot() from jtools to plot the adjusted slopes per group.

edss.plot <- interact_plot(fit.edss2, pred = "EDSSbaseline", modx = "subtype",
                           plot.points = T,
                           interval = T,
                           vary.lty = T,
                           facet.modx = T,
                           x.label = "EDSS", y.label = "Brain-PAD (years)",
                           point.size = 1, 
                           modx.labels = c("CIS", "RRMS", "SPMS", "PPMS")) +
  geom_hline(yintercept = 0, lty = 2) +
  scale_fill_manual(values = ms.palette[2:5], name = "MS subtype") +
  scale_color_manual(values = ms.palette[2:5], name = "MS subtype") +
  theme_cowplot()
# ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/EDSS_brain-PAD_plot.pdf", height = 5, width = 8, useDingbats = FALSE)

Age at diagnosis

LME accounting for fixed effects of age at baseline, age^2, gender, and random effects of Cohort and field strength. Exclude CIS patients and healthy controls.

summary(run_lm("disease_onset_age", subset(df.bl, df.bl$subtype != "control" & df.bl$subtype != "CIS")))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## BrainPAD ~ data[[var]] + poly(age_at_scan, 2) + gender + field_strength +  
##     wb_vol_ratio_icv + (1 | Cohort)
##    Data: data
## Control: lmerControl(optimizer = "Nelder_Mead")
## 
## REML criterion at convergence: 5946.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.7293 -0.6049 -0.0352  0.6113  4.2337 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Cohort   (Intercept) 10.89    3.300   
##  Residual             42.68    6.533   
## Number of obs: 900, groups:  Cohort, 14
## 
## Fixed effects:
##                         Estimate Std. Error         df t value Pr(>|t|)
## (Intercept)            134.74403    4.06450  766.40479  33.151  < 2e-16
## data[[var]]             -0.16109    0.03591  891.85866  -4.486 8.19e-06
## poly(age_at_scan, 2)1 -117.99406   11.97374  892.65321  -9.854  < 2e-16
## poly(age_at_scan, 2)2  -13.86667    6.67536  883.28360  -2.077  0.03806
## gendermale              -0.18540    0.47044  881.60719  -0.394  0.69361
## field_strength3T         2.78246    0.96484  189.74899   2.884  0.00438
## wb_vol_ratio_icv      -151.57879    5.24069  888.63597 -28.923  < 2e-16
##                          
## (Intercept)           ***
## data[[var]]           ***
## poly(age_at_scan, 2)1 ***
## poly(age_at_scan, 2)2 *  
## gendermale               
## field_strength3T      ** 
## wb_vol_ratio_icv      ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) dt[[]] p(__,2)1 p(__,2)2 gndrml fld_3T
## data[[var]] -0.005                                       
## ply(g__,2)1 -0.278 -0.743                                
## ply(g__,2)2  0.047  0.036 -0.033                         
## gendermale  -0.217 -0.086  0.125    0.067                
## fld_strng3T -0.097  0.062 -0.055   -0.004   -0.042       
## wb_vl_rt_cv -0.926 -0.287  0.492   -0.060    0.199 -0.010

When predicting brain-PAD in a LME model, the effect of age at diagnosis at baseline beta = -0.16, 95% CI = -0.23, -0.09, p = 8.1884e-06.

Test for interaction between subtype and age at diagnosis on brain-PAD:

fit.age <- lmer(BrainPAD ~ disease_onset_age * subtype + poly(age_at_baseline_scan1,2) + gender + field_strength + wb_vol_ratio_icv + (1|Cohort), data = subset(df.bl, df.bl$subtype != "control" & df.bl$subtype != "CIS"))
round(as.matrix(anova(fit.age)["disease_onset_age:subtype",]),3)
##                           Sum Sq Mean Sq NumDF   DenDF F value Pr(>F)
## disease_onset_age:subtype 51.344  25.672     2 877.313     0.6  0.549

Use simple slopes from jtools to extract adjusted slopes for each subtype.

fit.age2 <- lmer(BrainPAD ~ disease_onset_age * subtype + age_at_baseline_scan1 + gender + field_strength + (1|Cohort), data = subset(df.bl, df.bl$subtype != "control" & df.bl$subtype != "CIS"))
sim_slopes(fit.age2, pred = "disease_onset_age", modx = "subtype", johnson_neyman = F)
## SIMPLE SLOPES ANALYSIS 
## 
## Slope of disease_onset_age when subtype = RRMS: 
##   Est. S.E.    df    p
##  -0.37 0.05 -6.75 0.00
## 
## Slope of disease_onset_age when subtype = SPMS: 
##   Est. S.E.    df    p
##  -0.57 0.09 -6.39 0.00
## 
## Slope of disease_onset_age when subtype = PPMS: 
##   Est. S.E.    df    p
##  -0.51 0.10 -5.35 0.00
age.plot <- interact_plot(fit.age2, pred = "disease_onset_age", modx = "subtype",
                          plot.points = T, interval = T, vary.lty = T, facet.modx = T, 
                          x.label = "Age at clincial diagnosis (years)", y.label = "Brain-PAD (years)",
                          point.size = 1,
                          modx.labels = c("RRMS", "SPMS", "PPMS")) + 
  geom_hline(yintercept = 0, lty = 2) + 
  scale_fill_manual(values = ms.palette[3:5], name = "MS subtype") +
  scale_color_manual(values = ms.palette[3:5], name = "MS subtype") +
  theme_cowplot()
# ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/diagnosis_age_brain-PAD_plot.pdf", height = 5, width = 8, useDingbats = FALSE)

Time since diagnosis

LME accounting for fixed effects of age at baseline, age^2 gender, and random effects of Cohort and field strength. Exclude controls, CIS patients and anyone with a time since diagnosis = 0.

summary(run_lm("disease_duration_at_baseline_scan1", subset(df.bl, df.bl$subtype != "CIS" & df.bl$disease_duration > 0)))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## BrainPAD ~ data[[var]] + poly(age_at_scan, 2) + gender + field_strength +  
##     wb_vol_ratio_icv + (1 | Cohort)
##    Data: data
## Control: lmerControl(optimizer = "Nelder_Mead")
## 
## REML criterion at convergence: 5677.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.7809 -0.6013 -0.0357  0.6135  4.1849 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Cohort   (Intercept) 10.79    3.284   
##  Residual             43.44    6.591   
## Number of obs: 857, groups:  Cohort, 14
## 
## Fixed effects:
##                         Estimate Std. Error         df t value Pr(>|t|)
## (Intercept)            128.02789    4.39562  774.82522  29.126  < 2e-16
## data[[var]]              0.15577    0.03642  849.23218   4.277 2.11e-05
## poly(age_at_scan, 2)1 -165.16100    8.38789  849.56458 -19.690  < 2e-16
## poly(age_at_scan, 2)2  -13.78655    6.71476  839.99378  -2.053  0.04037
## gendermale              -0.13429    0.48410  838.71263  -0.277  0.78153
## field_strength3T         2.65557    0.97568  181.83682   2.722  0.00712
## wb_vol_ratio_icv      -151.65033    5.34831  845.15051 -28.355  < 2e-16
##                          
## (Intercept)           ***
## data[[var]]           ***
## poly(age_at_scan, 2)1 ***
## poly(age_at_scan, 2)2 *  
## gendermale               
## field_strength3T      ** 
## wb_vol_ratio_icv      ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) dt[[]] p(__,2)1 p(__,2)2 gndrml fld_3T
## data[[var]] -0.339                                       
## ply(g__,2)1 -0.266 -0.317                                
## ply(g__,2)2  0.058 -0.024 -0.008                         
## gendermale  -0.222  0.082  0.062    0.060                
## fld_strng3T -0.067 -0.053  0.009   -0.006   -0.043       
## wb_vl_rt_cv -0.971  0.283  0.303   -0.063    0.187 -0.014

When predicting brain-PAD in a LME model, the effect of time since diagnosis at baseline beta = 0.16, 95% CI = 0.08, 0.23, p = 2.1098e-05.

Test for interaction between subtype and time since diagnosis on brain-PAD

fit.time <- lmer(BrainPAD ~ disease_duration_at_baseline_scan1 * subtype + poly(age_at_baseline_scan1,2) + gender + field_strength + wb_vol_ratio_icv + (1|Cohort), data = subset(df.bl, df.bl$subtype != "control" & df.bl$subtype != "CIS" & df.bl$disease_duration > 0))
round(as.matrix(anova(fit.time)["disease_duration_at_baseline_scan1:subtype",]),3)
##                                            Sum Sq Mean Sq NumDF   DenDF
## disease_duration_at_baseline_scan1:subtype 84.065  42.032     2 797.105
##                                            F value Pr(>F)
## disease_duration_at_baseline_scan1:subtype   0.966  0.381

Use simple slopes from jtools to extract adjusted slopes for each subtype.

fit.time2 <- lmer(BrainPAD ~ disease_duration_at_baseline_scan1 * subtype + age_at_baseline_scan1 + gender + field_strength + wb_vol_ratio_icv + (1|Cohort), data = subset(df.bl, df.bl$subtype != "control" & df.bl$subtype != "CIS" & df.bl$disease_duration > 0))
sim_slopes(fit.time2, pred = "disease_duration_at_baseline_scan1", modx = "subtype", johnson_neyman = F)
## SIMPLE SLOPES ANALYSIS 
## 
## Slope of disease_duration_at_baseline_scan1 when subtype = RRMS: 
##  Est. S.E.   df    p
##  0.19 0.04 4.29 0.00
## 
## Slope of disease_duration_at_baseline_scan1 when subtype = SPMS: 
##  Est. S.E.   df    p
##  0.08 0.07 1.06 0.29
## 
## Slope of disease_duration_at_baseline_scan1 when subtype = PPMS: 
##  Est. S.E.   df    p
##  0.02 0.13 0.12 0.90
time.plot <- interact_plot(fit.time2, pred = "disease_duration_at_baseline_scan1", modx = "subtype",
                           plot.points = T, interval = T, vary.lty = T, facet.modx = T, point.size = 1,
                           x.label = "Time since clinical diagnosis (years)", y.label = "Brain-PAD (years)",
                           modx.labels = c("RRMS", "SPMS", "PPMS")) +
  geom_hline(yintercept = 0, lty = 2) +
  scale_fill_manual(values = ms.palette[3:5], name = "MS subtype") +
  scale_color_manual(values = ms.palette[3:5], name = "MS subtype") +
  theme_cowplot()
# ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/diagnosis_years_brain-PAD_plot.pdf", height = 5, width = 8, useDingbats = FALSE)

Arrange EDSS, age at diagnosis and time since diagnosis plots

Use cowplot package.

cowplot::plot_grid(edss.plot, age.plot, time.plot, labels = "AUTO", ncol = 1)

ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/clinical_brain-PAD_plots.pdf", height = 15, width = 8, useDingbats = FALSE)

Brain age adjusted for brain volumes

Function to get standardised beta coefficients from lmer, from stack exchange: https://stats.stackexchange.com/questions/123366/lmer-standardized-regression-coefficients

lm.beta.lmer <- function(mod) {
   b <- fixef(mod)[-1]
   sd.x <- apply(getME(mod,"X")[,-1],2,sd)
   sd.y <- sd(getME(mod,"y"))
   b*sd.x/sd.y
}

Baseline clinical measures, adjusting for whole-brain volume as ratio of ICV, along with standard covariates.

EDSS_baseline <- lm.beta.lmer(lmer(EDSSbaseline ~ BrainPAD + wb_vol_ratio_icv + poly(age_at_scan, 2) + gender + field_strength + (1|Cohort), data = df.bl, control = lmerControl(optimizer = "Nelder_Mead")))
Time_since_diagnosis <- lm.beta.lmer(model <- lmer(disease_duration ~ BrainPAD + wb_vol_ratio_icv + poly(age_at_scan, 2) + gender + field_strength + (1|Cohort), data = df.bl, control = lmerControl(optimizer = "Nelder_Mead")))
Age_at_onset <- lm.beta.lmer(lmer(disease_onset_age ~ BrainPAD + wb_vol_ratio_icv + poly(age_at_scan, 2) + gender + field_strength + (1|Cohort), data = df.bl, control = lmerControl(optimizer = "Nelder_Mead")))
round(as.data.frame(rbind(EDSS_baseline, Time_since_diagnosis, Age_at_onset)),3)
##                      BrainPAD wb_vol_ratio_icv poly(age_at_scan, 2)1
## EDSS_baseline           0.141           -0.143                 0.265
## Time_since_diagnosis    0.245           -0.102                 0.407
## Age_at_onset           -0.202            0.086                 0.774
##                      poly(age_at_scan, 2)2 gendermale field_strength3T
## EDSS_baseline                        0.096      0.000           -0.068
## Time_since_diagnosis                 0.074     -0.041           -0.062
## Age_at_onset                        -0.065      0.028            0.044

DMT

Effects of disease modifying treatment on brain-PAD

table(df.bl$DMT_YesNoNA, df.bl$subtype)
##      
##       control CIS RRMS SPMS PPMS
##   NO        0 234  285   52  104
##   YES       0  60  356   51    8

Descriptive statistics brain-PAD by DMT status

with(subset(df.bl, df.bl$control0rest1 == "MS"), describeBy(BrainPAD, DMT_YesNoNA))
## 
##  Descriptive statistics by group 
## group: NO
##    vars   n mean   sd median trimmed  mad    min   max range skew kurtosis
## X1    1 675 8.21 9.12   6.99    7.63 8.49 -13.07 47.21 60.28  0.7     0.79
##      se
## X1 0.35
## -------------------------------------------------------- 
## group: YES
##    vars   n  mean    sd median trimmed   mad    min   max range skew
## X1    1 475 14.22 10.66  13.28   13.78 10.87 -18.18 48.67 66.85 0.37
##    kurtosis   se
## X1    -0.01 0.49

Fit LME model

fit_DMT <- lmer(BrainPAD ~ 
                  subtype +
                  poly(age_at_scan, 2) +
                  gender +
                  field_strength +
                  wb_vol_ratio_icv +
                  DMT_YesNoNA +
                  (1|Cohort),
                data = df.bl,
                control = lmerControl(optimizer = "Nelder_Mead"))
emmeans(object = fit_DMT, pairwise ~ DMT_YesNoNA)
## $emmeans
##  DMT_YesNoNA emmean    SE   df lower.CL upper.CL
##  NO            10.2 0.696 21.3     8.76     11.6
##  YES           12.1 0.755 27.6    10.56     13.7
## 
## Results are averaged over the levels of: subtype, gender, field_strength 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast estimate   SE   df t.ratio p.value
##  NO - YES    -1.91 0.49 1072 -3.898  0.0001 
## 
## Results are averaged over the levels of: subtype, gender, field_strength 
## Degrees-of-freedom method: kenward-roger

EDSS progression survival analysis

Based on Arman Eshaghi’s code used in Eshaghi et al., 2018 Annals of Neurology. Function for characterising EDSS progression, based on different rates of change and different baseline EDSS values

is_sustained_progression <- function(edssAtStart, change){
  sustainedProgression <- FALSE
  #if start of edss is 0, 1.5 increase is considered sustained progression
  if ((edssAtStart < 1) & (change >= 1.5)) {
    sustainedProgression <- TRUE
  }
  #if start of edss is 6 or above, 0.5 increase is considered sustained progression
  else if ((edssAtStart >= 6) & (change >= 0.5 )) {
    sustainedProgression <- TRUE
  }
  #if start of edss is more than zero but less than 6, sustained progression is by 1 increase in edss
  else if ((edssAtStart >= 1 ) & (edssAtStart < 6 )  & (change >= 1  )) {
    sustainedProgression <- TRUE
  }
  return(sustainedProgression)
}

Determine change in EDSS from baseline to last follow-up

Select latest EDSS per subject in subjects with 2 or more assessments

y1 <- df %>%
  filter(!subtype == "control")  %>%
  filter(!is.na(EDSSbaseline)) %>%
  group_by(PatientID) %>%
  top_n(1, interval) %>%
  ungroup() %>%
  dplyr::rename(latest_EDSS = EDSSatScan) %>%
  dplyr::select(PatientID, interval, EDSSbaseline, latest_EDSS) %>%
  filter(!is.na(latest_EDSS)) %>%
  mutate(EDSSchange = latest_EDSS - EDSSbaseline)

# apply Arman's function
y1$EDSS_progression <-  mapply(is_sustained_progression, y1$EDSSbaseline, y1$EDSSchange) 

## get baseline brain-PAD and brain volumetric measures
y2 <- df %>%
  filter(!subtype == "control")  %>%
  filter(interval == 0) %>%
  filter(!is.na(EDSSbaseline)) %>%
  dplyr::rename(BrainPAD_baseline = BrainPAD) %>%
  dplyr::rename(GM_vol_baseline = GM_vol) %>%
  dplyr::rename(WM_vol_baseline = WM_vol) %>%
  dplyr::rename(CSF_vol_baseline = CSF_vol) %>%
  dplyr::rename(WBV_baseline = WBV) %>%
  dplyr::rename(ICV_baseline = ICV) %>%
  dplyr::select(-one_of('interval'))

y3 <- right_join(y1, y2, by = c("PatientID")) %>%
  filter(!is.na(latest_EDSS))

Numbers of EDSS progressors

The number of MS patients with >= 2 EDSS scores was 1143.

table(y3$EDSS_progression) # calculate proportion of patients who progress
## 
## FALSE  TRUE 
##   840   303
round(prop.table(table(y3$EDSS_progression)),3) # calculate percentage of patients who progress
## 
## FALSE  TRUE 
## 0.735 0.265

Run survival analysis

Using brain-PAD with normal covariates

# creating new response function
S <- Surv(time = y3$interval, event = y3$EDSS_progression)
# Brain-PAD, age, sex model
surv.model <- coxph(S ~ BrainPAD_baseline + poly(age_at_baseline_scan1,2) + gender + field_strength, data = y3)
summary(surv.model)
## Call:
## coxph(formula = S ~ BrainPAD_baseline + poly(age_at_baseline_scan1, 
##     2) + gender + field_strength, data = y3)
## 
##   n= 1143, number of events= 303 
## 
##                                      coef exp(coef)  se(coef)     z
## BrainPAD_baseline               2.301e-02 1.023e+00 5.801e-03 3.966
## poly(age_at_baseline_scan1, 2)1 1.106e+01 6.330e+04 2.117e+00 5.223
## poly(age_at_baseline_scan1, 2)2 8.872e-01 2.428e+00 2.072e+00 0.428
## gendermale                      2.317e-01 1.261e+00 1.202e-01 1.927
## field_strength3T                3.472e-01 1.415e+00 1.705e-01 2.037
##                                 Pr(>|z|)    
## BrainPAD_baseline               7.32e-05 ***
## poly(age_at_baseline_scan1, 2)1 1.76e-07 ***
## poly(age_at_baseline_scan1, 2)2   0.6686    
## gendermale                        0.0540 .  
## field_strength3T                  0.0417 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                 exp(coef) exp(-coef) lower .95 upper .95
## BrainPAD_baseline                   1.023  0.9772581   1.01170 1.035e+00
## poly(age_at_baseline_scan1, 2)1 63298.139  0.0000158 998.79076 4.012e+06
## poly(age_at_baseline_scan1, 2)2     2.428  0.4118114   0.04182 1.410e+02
## gendermale                          1.261  0.7931507   0.99607 1.596e+00
## field_strength3T                    1.415  0.7066566   1.01319 1.976e+00
## 
## Concordance= 0.648  (se = 0.017 )
## Rsquare= 0.051   (max possible= 0.953 )
## Likelihood ratio test= 60.25  on 5 df,   p=1e-11
## Wald test            = 62.84  on 5 df,   p=3e-12
## Score (logrank) test = 65.45  on 5 df,   p=9e-13
# Check the assumptions of proportional hazards are met
cox.zph(surv.model)
##                                       rho    chisq        p
## BrainPAD_baseline                0.145349 7.54e+00 0.006035
## poly(age_at_baseline_scan1, 2)1 -0.068122 1.44e+00 0.229532
## poly(age_at_baseline_scan1, 2)2  0.166316 7.78e+00 0.005279
## gendermale                      -0.000737 1.67e-04 0.989698
## field_strength3T                -0.163475 9.14e+00 0.002502
## GLOBAL                                 NA 2.22e+01 0.000485

Check that the brain-PAD line is horizontal.

plot(cox.zph(surv.model)[1])

The hazard ratio for brain-PAD on time-to-disease-progression was HR (95% CI) = 1.023, 1.012, 1.035. That means for every additional +1 year of brain-PAD there is a 1.023% increase in the likelihood of EDSS progression. Extrapolated over 5 years of brain-PAD, there is a 1.122 increase in the likelihood of EDSS progression.

Adding normalised brain volume as a covariate

# creating new response function
S <- Surv(time = y3$interval, event = y3$EDSS_progression)
# Brain-PAD, age, sex model
surv.model <- coxph(S ~ BrainPAD_baseline + poly(age_at_baseline_scan1,2) + gender + field_strength + wb_vol_ratio_icv, data = y3)
summary(surv.model)
## Call:
## coxph(formula = S ~ BrainPAD_baseline + poly(age_at_baseline_scan1, 
##     2) + gender + field_strength + wb_vol_ratio_icv, data = y3)
## 
##   n= 1143, number of events= 303 
## 
##                                       coef  exp(coef)   se(coef)      z
## BrainPAD_baseline               -4.807e-03  9.952e-01  7.826e-03 -0.614
## poly(age_at_baseline_scan1, 2)1  2.097e+00  8.144e+00  2.707e+00  0.775
## poly(age_at_baseline_scan1, 2)2 -2.016e-01  8.174e-01  2.086e+00 -0.097
## gendermale                       4.873e-02  1.050e+00  1.252e-01  0.389
## field_strength3T                 3.860e-01  1.471e+00  1.712e-01  2.254
## wb_vol_ratio_icv                -9.416e+00  8.141e-05  1.728e+00 -5.448
##                                 Pr(>|z|)    
## BrainPAD_baseline                 0.5390    
## poly(age_at_baseline_scan1, 2)1   0.4385    
## poly(age_at_baseline_scan1, 2)2   0.9230    
## gendermale                        0.6971    
## field_strength3T                  0.0242 *  
## wb_vol_ratio_icv                5.11e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                 exp(coef) exp(-coef) lower .95 upper .95
## BrainPAD_baseline               9.952e-01  1.005e+00 9.801e-01 1.011e+00
## poly(age_at_baseline_scan1, 2)1 8.144e+00  1.228e-01 4.044e-02 1.640e+03
## poly(age_at_baseline_scan1, 2)2 8.174e-01  1.223e+00 1.369e-02 4.880e+01
## gendermale                      1.050e+00  9.524e-01 8.215e-01 1.342e+00
## field_strength3T                1.471e+00  6.798e-01 1.052e+00 2.058e+00
## wb_vol_ratio_icv                8.141e-05  1.228e+04 2.750e-06 2.410e-03
## 
## Concordance= 0.661  (se = 0.018 )
## Rsquare= 0.074   (max possible= 0.953 )
## Likelihood ratio test= 87.89  on 6 df,   p=<2e-16
## Wald test            = 96.81  on 6 df,   p=<2e-16
## Score (logrank) test = 98.77  on 6 df,   p=<2e-16
# Check the assumptions of proportional hazards are met
cox.zph(surv.model)
##                                     rho  chisq        p
## BrainPAD_baseline                0.0804  2.224 0.135892
## poly(age_at_baseline_scan1, 2)1 -0.0893  2.830 0.092543
## poly(age_at_baseline_scan1, 2)2  0.1581  7.318 0.006827
## gendermale                      -0.0284  0.268 0.604561
## field_strength3T                -0.1616  8.835 0.002955
## wb_vol_ratio_icv                -0.0612  1.328 0.249135
## GLOBAL                               NA 24.602 0.000405

Time-to-EDSS progression Kaplan-Meier plots

Run survplot on survival object

Based on a median split of brain-PAD.

km.plot.df <- y3 %>% mutate(split_BrainPAD = ntile(BrainPAD_baseline, 2))
str(km.plot.df)
## Classes 'tbl_df', 'tbl' and 'data.frame':    1143 obs. of  39 variables:
##  $ PatientID                         : Factor w/ 1367 levels "AMSTERDAM_4001",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ interval                          : num  2.32 2.35 2.32 1.23 1.23 ...
##  $ EDSSbaseline.x                    : num  2.5 3 4 2.5 7.5 4 2 6 2 6.5 ...
##  $ latest_EDSS                       : num  3 2.5 4 4 7.5 4 2 6 2.5 6.5 ...
##  $ EDSSchange                        : num  0.5 -0.5 0 1.5 0 0 0 0 0.5 0 ...
##  $ EDSS_progression                  : logi  FALSE FALSE FALSE TRUE FALSE FALSE ...
##  $ Cohort                            : Factor w/ 15 levels "Amsterdam","Barcelona",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ scanner_status                    : Factor w/ 4 levels "1.5T","1.5T_post_2004",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ gender                            : Factor w/ 2 levels "female","male": 2 1 1 1 1 1 1 1 2 2 ...
##  $ control0ppms1cisoforever2other3   : int  3 3 3 3 3 3 3 3 3 1 ...
##  $ control0rest1                     : Factor w/ 2 levels "control","MS": 2 2 2 2 2 2 2 2 2 2 ...
##  $ control0ms1cis2                   : Factor w/ 3 levels "CIS","control",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ age_at_baseline_scan1             : num  57.8 44.4 28.3 44.8 39 ...
##  $ disease_duration_at_baseline_scan1: num  6.8 6.38 6.29 6.79 19.02 ...
##  $ DMT_YesNoNA                       : Factor w/ 2 levels "NO","YES": 1 1 2 2 2 1 2 2 2 1 ...
##  $ DMTYes1                           : Factor w/ 2 levels "No treatment",..: 1 1 2 2 2 1 2 2 2 1 ...
##  $ EDSSbaseline.y                    : num  2.5 3 4 2.5 7.5 4 2 6 2 6.5 ...
##  $ NoScans                           : int  3 3 3 2 2 3 3 3 3 2 ...
##  $ FUTimeMax                         : num  2.32 2.35 2.32 1.23 1.23 ...
##  $ ScanName                          : Factor w/ 3603 levels "AMSTERDAM_4001_baseline_t1",..: 1 4 7 10 12 14 17 20 23 26 ...
##  $ age_at_scan                       : num  57.8 44.4 28.3 44.8 39 ...
##  $ EDSSatScan                        : num  2.5 3 4 2.5 7.5 4 2 6 2 6.5 ...
##  $ BrainAge                          : num  69.6 58.1 52.4 54 73 ...
##  $ BrainPAD_baseline                 : num  11.75 13.7 24.09 9.22 33.99 ...
##  $ subtype                           : Factor w/ 5 levels "control","CIS",..: 3 3 3 3 4 3 3 3 3 5 ...
##  $ disease_onset_age                 : num  51 38 22 38 20 ...
##  $ disease_duration                  : num  6.8 6.38 6.29 6.79 19.02 ...
##  $ scan_number                       : Factor w/ 10 levels "scan1","scan10",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ GM_vol_baseline                   : num  0.574 0.622 0.647 0.648 0.575 0.609 0.641 0.56 0.712 0.635 ...
##  $ WM_vol_baseline                   : num  0.536 0.43 0.404 0.599 0.394 0.493 0.503 0.382 0.496 0.534 ...
##  $ CSF_vol_baseline                  : num  0.352 0.293 0.355 0.271 0.45 0.259 0.396 0.384 0.337 0.47 ...
##  $ WBV_baseline                      : num  1.11 1.052 1.051 1.247 0.969 ...
##  $ ICV_baseline                      : num  1.46 1.34 1.41 1.52 1.42 ...
##  $ gm_vol_ratio_icv                  : num  0.393 0.462 0.46 0.427 0.405 ...
##  $ wm_vol_ratio_icv                  : num  0.367 0.32 0.287 0.395 0.278 ...
##  $ csf_vol_ratio_icv                 : num  0.241 0.218 0.252 0.179 0.317 ...
##  $ wb_vol_ratio_icv                  : num  0.759 0.782 0.748 0.821 0.683 ...
##  $ field_strength                    : Factor w/ 2 levels "1.5T","3T": 1 1 1 1 1 1 1 1 1 1 ...
##  $ split_BrainPAD                    : int  2 2 2 1 2 1 2 1 2 2 ...
S <- Surv(time = km.plot.df$interval, event = km.plot.df$EDSS_progression) # response function
# survplot <- ggsurvplot(survfit(S ~ split_BrainPAD, data = km.plot.df),
#                        surv.plot.height = 0.9,
#                        ggtheme = theme_survminer(),
#                        risk.table = T,
#                        cumcensor = F,
#                        conf.int = F,
#                        palette = c("blue", "red"),
#                        censor = F,
#                        xlab = "Time (years)",
#                        legend.labs = c("Brain-PAD < median", "Brain-PAD > median"))
# survplot
# ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/KM_brain-PAD_plot.pdf", height = 8, width = 8, print(survplot, newpage = FALSE), useDingbats = FALSE)

The median value = 9.68 years.

Longitudinal brain-age analysis

The total number of people with two or more scans was n = 1266. This included n = 1155 MS patients and n = 111 controls.

With 3 or more scans, there were n = 509 MS patients and n = 64 controls.

Determine change in brain-PAD from baseline to last follow-up

## select latest brain-PAD per subject in subjects with 2 or more assessments
z1 <- df %>%
  filter(NoScans >= 2)  %>%
  group_by(PatientID) %>%
  top_n(1, interval) %>%
  ungroup() %>%
  dplyr::rename(latest_BrainPAD = BrainPAD) %>%
  dplyr::rename(latest_wb_vol_ratio_icv = wb_vol_ratio_icv) %>%
  dplyr::select(PatientID, interval, latest_BrainPAD, latest_wb_vol_ratio_icv)

## baseline brain-PAD
z2 <- df %>%
  filter(NoScans >= 2)  %>%
  filter(interval == 0) %>%
  filter(!is.na(BrainPAD)) %>%
  dplyr::rename(BrainPAD_baseline = BrainPAD) %>%
  #dplyr::rename(GM_vol_baseline = GM_vol) %>%
  #dplyr::rename(WM_vol_baseline = WM_vol) %>%
  dplyr::rename(wb_vol_baseline = wb_vol_ratio_icv) %>%
  dplyr::select(-one_of('interval'))

## calculate change in brain-PAD between baseline and latest brain-PAD
z3 <- right_join(z1, z2, by = c("PatientID")) %>%
  mutate(BrainPAD_change = latest_BrainPAD - BrainPAD_baseline) %>%
  mutate(wb_vol_ratio_icv_change = latest_wb_vol_ratio_icv - wb_vol_baseline)

Mean annualised rates of change in brain-PAD per group

describeBy(z3$BrainPAD_change/z3$interval, z3$subtype, mat = T, digits = 2)
##     item  group1 vars   n  mean   sd median trimmed  mad    min   max
## X11    1 control    1 111  0.03 2.02  -0.09   -0.12 1.14  -4.65  9.86
## X12    2     CIS    1 279  0.23 2.20   0.12    0.21 1.21 -11.74 11.49
## X13    3    RRMS    1 652  0.29 1.70   0.17    0.22 1.07 -12.24 16.21
## X14    4    SPMS    1 104 -0.29 1.60  -0.24   -0.27 0.93  -6.11  7.01
## X15    5    PPMS    1 119  0.40 1.52   0.30    0.40 1.38  -4.66  4.24
##     range  skew kurtosis   se
## X11 14.52  1.75     7.25 0.19
## X12 23.23  0.08     6.74 0.13
## X13 28.45  0.98    18.93 0.07
## X14 13.12  0.31     4.84 0.16
## X15  8.90 -0.18     0.62 0.14

Determine change in EDSS from baseline to last follow-up

## select latest EDSS per subject in subjects with 2 or more assessments
a1 <- df %>%
  filter(NoScans >= 2)  %>%
  group_by(PatientID) %>%
  top_n(1, interval) %>%
  ungroup() %>%
  dplyr::rename(latest_EDSSatScan = EDSSatScan) %>%
  dplyr::select(PatientID, interval, latest_EDSSatScan) 

## baseline EDSS
a2 <- df %>%
  filter(NoScans >= 2)  %>%
  filter(interval == 0) %>%
  filter(!is.na(EDSSatScan)) %>%
  dplyr::rename(EDSSatScan_baseline = EDSSatScan) %>%
  dplyr::rename(BrainPAD_baseline = BrainPAD) %>%
  #dplyr::rename(GM_vol_baseline = GM_vol) %>%
  #dplyr::rename(WM_vol_baseline = WM_vol) %>%
  dplyr::rename(WB_vol_baseline = wb_vol_ratio_icv) %>%
  dplyr::select(-one_of('interval'))

## calculate change in brain-PAD between baseline and latest brain-PAD
a3 <- right_join(a1, a2, by = c("PatientID")) %>%
  mutate(EDSS_change = latest_EDSSatScan - EDSSatScan_baseline)

Mean annualised rates of change in EDSS per group

with(subset(a3, a3$control0rest1 != "control"), describeBy(EDSS_change/interval, subtype, mat = F, digits = 2))
## 
##  Descriptive statistics by group 
## group: control
## NULL
## -------------------------------------------------------- 
## group: CIS
##    vars   n  mean   sd median trimmed mad   min max range  skew kurtosis
## X1    1 242 -0.26 1.05      0   -0.14 0.3 -6.86   3  9.86 -1.96     8.47
##      se
## X1 0.07
## -------------------------------------------------------- 
## group: RRMS
##    vars   n mean   sd median trimmed  mad   min  max range skew kurtosis
## X1    1 635 0.12 0.45      0     0.1 0.19 -2.24 3.29  5.53 1.09    10.49
##      se
## X1 0.02
## -------------------------------------------------------- 
## group: SPMS
##    vars   n mean   sd median trimmed  mad   min  max range skew kurtosis
## X1    1 104 0.14 0.29      0    0.11 0.07 -0.64 1.26   1.9 0.92     2.25
##      se
## X1 0.03
## -------------------------------------------------------- 
## group: PPMS
##    vars   n mean   sd median trimmed  mad   min max range skew kurtosis
## X1    1 117 0.36 0.63   0.17    0.27 0.25 -1.01   3  4.01 1.92     5.35
##      se
## X1 0.06

Relationship between annualised EDSS change and brain-PAD change

The total number of patients with two or more scans was n = 1155.

delta.df <- join(a3, z3)
with(delta.df, cor.test(EDSS_change, BrainPAD_change, method = "pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  EDSS_change and BrainPAD_change
## t = 9.0089, df = 1095, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2067138 0.3169474
## sample estimates:
##       cor 
## 0.2626875
with(delta.df[complete.cases(delta.df),], pcor.test(EDSS_change, BrainPAD_change, wb_vol_ratio_icv_change, method = "pearson"))
##     estimate    p.value statistic   n gp  Method
## 1 0.08117744 0.01126417  2.539243 975  1 pearson

Historgams of baseline EDSS scores and EDSS changes

plot1 <- ggplot(df.bl, aes(x = EDSSbaseline)) + geom_histogram(binwidth = 1, fill = "dodgerblue", colour = "black") + xlab("EDSS at baseline") + theme_cowplot()
plot2 <- ggplot(delta.df, aes(x = EDSS_change)) + geom_histogram(binwidth = 1, fill = "dodgerblue", colour = "black") + xlab("EDSS change") + theme_cowplot()
plot3 <- ggplot(subset(df.bl, df.bl$control0rest1 == "MS"), aes(x = BrainPAD)) + geom_histogram(binwidth = 1, fill = "darkgoldenrod2", colour = "black") + xlab("Brain-PAD at baseline") + theme_cowplot()
plot4 <- ggplot(subset(delta.df, delta.df$control0rest1 == "MS"), aes(x = BrainPAD_change)) + geom_histogram(binwidth = 1, fill = "darkgoldenrod2", colour = "black") + xlab("Brain-PAD change") + theme_cowplot()
plot5 <- ggplot(delta.df, aes(x = EDSS_change, y = BrainPAD_change)) + geom_point(aes(colour = subtype)) + geom_smooth(method = "lm", colour = "black") + labs(x = "EDSS change", y = "Brain-PAD change") + theme_cowplot() + scale_color_manual(values = ms.palette[-1], name = "MS subtype")

pg <- cowplot::plot_grid(cowplot::plot_grid(plot1, plot2, plot3, plot4, labels = "AUTO", ncol = 2), plot5, ncol = 2, labels = c("", "E"), rel_widths = c(2,1.5))
pg

cowplot::save_plot("~/Work/Brain ageing/Collaborations/MS/plots/histograms_change_EDSS_brain-PAD_plot.pdf", pg, base_asp = 2.5)

Interaction between subtype and EDSS change

fit.change <- lm(BrainPAD_change ~ EDSS_change * subtype, data = delta.df)
anova(fit.change)
## Analysis of Variance Table
## 
## Response: BrainPAD_change
##                       Df  Sum Sq Mean Sq F value    Pr(>F)    
## EDSS_change            1  1452.1 1452.13 82.4760 < 2.2e-16 ***
## subtype                3   212.1   70.68  4.0146  0.007441 ** 
## EDSS_change:subtype    3   206.0   68.68  3.9008  0.008702 ** 
## Residuals           1089 19173.7   17.61                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Covarying for change in normalised brain volumes

fit.change2 <- lm(BrainPAD_change ~ EDSS_change * subtype + wb_vol_ratio_icv_change, data = delta.df)
anova(fit.change2)
## Analysis of Variance Table
## 
## Response: BrainPAD_change
##                           Df  Sum Sq Mean Sq  F value    Pr(>F)    
## EDSS_change                1  1452.1  1452.1 117.0349 < 2.2e-16 ***
## subtype                    3   212.1    70.7   5.6968 0.0007184 ***
## wb_vol_ratio_icv_change    1  5769.5  5769.5 464.9914 < 2.2e-16 ***
## EDSS_change:subtype        3   110.7    36.9   2.9750 0.0307508 *  
## Residuals               1088 13499.6    12.4                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Use jtools package to get slopes from the model, per subtype.

sim_slopes(fit.change, pred = "EDSS_change", modx = "subtype", johnson_neyman = F, digits = 4)
## SIMPLE SLOPES ANALYSIS 
## 
## Slope of EDSS_change when subtype = CIS: 
##    Est.   S.E. t val.      p
##  0.8434 0.2189 3.8524 0.0001
## 
## Slope of EDSS_change when subtype = RRMS: 
##    Est.   S.E. t val.      p
##  1.2534 0.1459 8.5883 0.0000
## 
## Slope of EDSS_change when subtype = SPMS: 
##     Est.   S.E.  t val.      p
##  -0.6953 0.6510 -1.0680 0.2857
## 
## Slope of EDSS_change when subtype = PPMS: 
##    Est.   S.E. t val.      p
##  0.5882 0.3464 1.6983 0.0897
interact_plot(fit.change, pred = "EDSS_change", modx = "subtype", plot.points = T, interval = T, facet.modx = T, x.label = "EDSS annualised change", y.label = "Brain-PAD annualised change", modx.labels = c("CIS", "RRMS", "SPMS", "PPMS")) + geom_hline(yintercept = 0, lty = 2) + theme_cowplot()  +
  scale_fill_manual(values = ms.palette[-1], name = "MS subtype") +
  scale_color_manual(values = ms.palette[-1], name = "MS subtype")

ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/change_EDSS_brain-PAD_plot.pdf", height = 5, width = 8, useDingbats = FALSE)

Correlate baseline brain-PAD with the number of follow-up scans completed in the n=104 with >1 scan.

with(subset(df.bl, df.bl$NoScans > 1 & df.bl$subtype == "SPMS"), cor.test(BrainPAD, NoScans, method = "spearman"))
## Warning in cor.test.default(BrainPAD, NoScans, method = "spearman"): Cannot
## compute exact p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  BrainPAD and NoScans
## S = 241780, p-value = 0.002848
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## -0.289771

Longitudinal brain-predicted age trajectories

Interaction between group and time MS vs. controls

Conditional growth model - random effects of participant and cohort

model_int.group <- lmer(BrainPAD ~ control0rest1 * interval + poly(age_at_baseline_scan1, 2) + gender + field_strength + (interval|PatientID) + (1|Cohort), data = df, control = lmerControl(optimizer = "Nelder_Mead")) 
round(anova(model_int.group)["control0rest1:interval",],3)
## Type III Analysis of Variance Table with Satterthwaite's method
##                        Sum Sq Mean Sq NumDF  DenDF F value Pr(>F)  
## control0rest1:interval 27.695  27.695     1 1325.5   5.845  0.016 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model_int.group)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## BrainPAD ~ control0rest1 * interval + poly(age_at_baseline_scan1,  
##     2) + gender + field_strength + (interval | PatientID) + (1 |  
##     Cohort)
##    Data: df
## Control: lmerControl(optimizer = "Nelder_Mead")
## 
## REML criterion at convergence: 21373.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -9.7410 -0.3578 -0.0138  0.3453  8.6034 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev. Corr 
##  PatientID (Intercept) 82.8868  9.1042        
##            interval     0.4612  0.6791   -0.06
##  Cohort    (Intercept) 15.2072  3.8996        
##  Residual               4.7381  2.1767        
## Number of obs: 3564, groups:  PatientID, 1354; Cohort, 15
## 
## Fixed effects:
##                                   Estimate Std. Error         df t value
## (Intercept)                        0.05128    1.44705   38.42067   0.035
## control0rest1MS                   12.14169    1.02786 1258.53795  11.813
## interval                          -0.16552    0.14960 1374.14620  -1.106
## poly(age_at_baseline_scan1, 2)1  -59.19725   16.33371 1335.78018  -3.624
## poly(age_at_baseline_scan1, 2)2  -33.79256   14.93286 1340.43263  -2.263
## gendermale                         1.90177    0.52626 1337.05171   3.614
## field_strength3T                  -3.25935    0.99706  347.05623  -3.269
## control0rest1MS:interval           0.37219    0.15394 1325.50262   2.418
##                                 Pr(>|t|)    
## (Intercept)                     0.971912    
## control0rest1MS                  < 2e-16 ***
## interval                        0.268735    
## poly(age_at_baseline_scan1, 2)1 0.000301 ***
## poly(age_at_baseline_scan1, 2)2 0.023797 *  
## gendermale                      0.000313 ***
## field_strength3T                0.001187 ** 
## control0rest1MS:interval        0.015754 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cn01MS intrvl p(___1,2)1 p(___1,2)2 gndrml fld_3T
## cntrl0rs1MS -0.613                                                  
## interval    -0.066  0.093                                           
## pl(___1,2)1  0.072 -0.159 -0.001                                    
## pl(___1,2)2  0.026 -0.055  0.003  0.026                             
## gendermale  -0.147  0.027  0.003  0.027      0.059                  
## fld_strng3T -0.275  0.024  0.002  0.016     -0.027     -0.041       
## cntrl0r1MS:  0.063 -0.098 -0.972  0.001     -0.003     -0.003  0.000

Use sjPlots to visualise the interactions, using estimated marginal means

plot_model(model_int.group, type = "emm", terms = c("interval [all]", "control0rest1"), colors = c("darkgreen", "firebrick"), legend.title = "Group", grid = F, show.data = F) + theme_cowplot() + labs(x = "Interval from baseline scan (years)", y = "Brain-PAD (years)") + geom_hline(yintercept = 0, lty = 2)

Generate estimate marginal trends for healthy controls and for all MS/CIS combined, using annualised difference between baseline and final follow-up.

# emtrends(object = model_int.group, pairwise ~ control0rest1, var = "interval")

Interaction between group and time MS subtypes

Conditional growth model - random effects of participant and cohort

model_int.subtype <- lmer(BrainPAD ~ subtype * interval + poly(age_at_baseline_scan1, 2) + gender + field_strength + (interval|PatientID) + (1|Cohort), data = subset(df, df$subtype != "control"), control = lmerControl(optimizer = "Nelder_Mead")) 
round(anova(model_int.subtype)["subtype:interval",],3)
## Type III Analysis of Variance Table with Satterthwaite's method
##                  Sum Sq Mean Sq NumDF  DenDF F value Pr(>F)   
## subtype:interval 78.165  26.055     3 845.79   4.987  0.002 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Generate EMMs for healthy controls and for MS subtypes, using annualised difference between baseline and final follow-up.

# emtrends(object = model_int.subtype, pairwise ~ subtype, var = "interval")
plot_model(model_int.subtype, type = "emm", terms = c("interval [all]", "subtype"), legend.title = "Group", show.data = F, grid = F) + theme_cowplot() + labs(x = "Interval from baseline scan (years)", y = "Brain-PAD (years)") + geom_hline(yintercept = 0, lty = 2)

Longitudinal brain-PAD by interval plots

Randomly sub-sampling 50% of the data.

sub.sample.df <- df %>%
  filter(NoScans >= 2)  %>%
  group_by(PatientID) %>%
  sample_frac(0.5) %>%
  ungroup()

ggplot(data = sub.sample.df, aes(x = interval, y = BrainPAD, fill = subtype)) +
  geom_hline(yintercept = 0, lty = 2) +
  geom_line(aes(group = PatientID, colour = subtype), alpha = 0.5, linetype = 1, size = 0.25) +
  geom_point(aes(colour = subtype), size = 0.5, alpha = 0.5) +
  labs(x = "Time (years)", y = "Brain-predicted age difference (years)") +
  scale_fill_manual(values = ms.palette) +
  scale_color_manual(values = ms.palette) +
  # facet_grid(Cohort) +
  theme_cowplot() + theme(legend.position = "none")
## Warning: Removed 1 rows containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).

ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/longitudinal_brain-PAD_time_plot.pdf", height = 6, width = 6, useDingbats = FALSE)
## Warning: Removed 1 rows containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).
ggplot(data = df, aes(x = age_at_scan, y = BrainPAD, fill = subtype)) +
  geom_hline(yintercept = 0, lty = 2) +
  geom_line(aes(group = PatientID, colour = subtype), alpha = 0.5, linetype = 1, size = 0.2) +
  # geom_smooth(method = "lm", aes(col = subtype)) +
  labs(x = "Time (years)", y = "Brain-predicted age difference (years)") +
  scale_fill_manual(values = ms.palette) +
  scale_color_manual(values = ms.palette) +
  facet_wrap(~ subtype, scales = "free_x") +
  theme_cowplot() + theme(legend.position = "none")
## Warning: Removed 1 rows containing missing values (geom_path).

Age at diagnosis and longitudinal brain-PAD

model_int.onset <- lmer(BrainPAD ~ disease_onset_age * interval + poly(age_at_baseline_scan1, 2) + gender + field_strength + (interval|PatientID) + (1|Cohort), data = subset(df, df$control0rest1 == "MS"), control = lmerControl(optimizer = "Nelder_Mead"))
## Warning: Some predictor variables are on very different scales: consider
## rescaling

## Warning: Some predictor variables are on very different scales: consider
## rescaling
anova(model_int.onset)
## Type III Analysis of Variance Table with Satterthwaite's method
##                                Sum Sq Mean Sq NumDF   DenDF  F value
## disease_onset_age              937.33  937.33     1 1035.25 175.4335
## interval                        37.98   37.98     1  696.12   7.1079
## poly(age_at_baseline_scan1, 2) 346.90  173.45     2 1044.47  32.4633
## gender                          65.08   65.08     1 1127.12  12.1810
## field_strength                  26.03   26.03     1  179.47   4.8725
## disease_onset_age:interval       5.65    5.65     1  693.35   1.0572
##                                   Pr(>F)    
## disease_onset_age              < 2.2e-16 ***
## interval                       0.0078525 ** 
## poly(age_at_baseline_scan1, 2)   2.1e-14 ***
## gender                         0.0005014 ***
## field_strength                 0.0285553 *  
## disease_onset_age:interval     0.3042026    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Time since diagnosis and longitudinal brain-PAD

model_int.duration <- lmer(BrainPAD ~ disease_duration * interval + poly(age_at_baseline_scan1, 2) + gender + field_strength + (interval|PatientID) + (1|Cohort), data = subset(df, df$control0rest1 == "MS"), control = lmerControl(optimizer = "Nelder_Mead"))
## Warning: Some predictor variables are on very different scales: consider
## rescaling

## Warning: Some predictor variables are on very different scales: consider
## rescaling
anova(model_int.duration)
## Type III Analysis of Variance Table with Satterthwaite's method
##                                Sum Sq Mean Sq NumDF   DenDF  F value
## disease_duration               933.97  933.97     1 1013.59 179.9523
## interval                       331.67  331.67     1  526.36  63.9038
## poly(age_at_baseline_scan1, 2) 462.49  231.25     2 1076.16  44.5554
## gender                          61.60   61.60     1 1085.24  11.8684
## field_strength                  23.44   23.44     1  171.50   4.5165
## disease_duration:interval      172.66  172.66     1  747.76  33.2681
##                                   Pr(>F)    
## disease_duration               < 2.2e-16 ***
## interval                       8.330e-15 ***
## poly(age_at_baseline_scan1, 2) < 2.2e-16 ***
## gender                         0.0005928 ***
## field_strength                 0.0350017 *  
## disease_duration:interval      1.175e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
onset.plt <- plot_model(model_int.onset, type = "pred", terms = c("interval [all]", "disease_onset_age[20,30,40,50,60]"), legend.title = "Onset age (years)", show.data = F, grid = F) +
  geom_hline(yintercept = 0, lty = 2) +
  labs(x = "Interval from baseline scan (years)", y = "Brain-PAD (years)") +
  theme_cowplot()
duration.plt <- plot_model(model_int.duration, type = "pred", terms = c("interval [all]", "disease_duration[0,7.5,15]"), legend.title = "Time since diagnosis (years)", show.data = F, grid = F) +
  geom_hline(yintercept = 0, lty = 2) +
  labs(x = "Interval from baseline scan (years)", y = "Brain-PAD (years)") +
  theme_cowplot()
cowplot::plot_grid(onset.plt, duration.plt, nrow = 1)

Plot(s) with age and estimated age for each individual from each group and site

ggplot(data = df, aes(x = age_at_scan, y = BrainAge, fill = subtype)) +
  geom_abline(slope = 1, lty = 2) +
  geom_line(aes(group = PatientID, colour = subtype), alpha = 0.5, linetype = 1, size = 0.25) +
  geom_point(aes(colour = subtype), size = 0.5, alpha = 0.5) +
  labs(x = "Age (years)", y = "Brain-predicted age (years)") +
  scale_fill_manual(values = ms.palette) +
  scale_color_manual(values = ms.palette) +
  facet_wrap(~ Cohort) +
  theme_cowplot() + theme(legend.position = "none")

ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/longitudinal_brain-age_years_plot.pdf", height = 6, width = 10, useDingbats = FALSE)